Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland...
authorLinus Torvalds <torvalds@g5.osdl.org>
Tue, 20 Jun 2006 02:01:59 +0000 (19:01 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Tue, 20 Jun 2006 02:01:59 +0000 (19:01 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (46 commits)
  IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
  IB/mthca: Make all device methods truly reentrant
  IB/mthca: Fix memory leak on modify_qp error paths
  IB/uverbs: Factor out common idr code
  IB/uverbs: Don't decrement usecnt on error paths
  IB/uverbs: Release lock on error path
  IB/cm: Use address handle helpers
  IB/sa: Add ib_init_ah_from_path()
  IB: Add ib_init_ah_from_wc()
  IB/ucm: Get rid of duplicate P_Key parameter
  IB/srp: Factor out common request reset code
  IB/srp: Support SRP rev. 10 targets
  [SCSI] srp.h: Add I/O Class values
  IB/fmr: Use device's max_map_map_per_fmr attribute in FMR pool.
  IB/mthca: Fill in max_map_per_fmr device attribute
  IB/ipath: Add client reregister event generation
  IB/mthca: Add client reregister event generation
  IB: Move struct port_info from ipath to <rdma/ib_smi.h>
  IPoIB: Handle client reregister events
  IB: Add client reregister event type
  ...

373 files changed:
Documentation/kernel-parameters.txt
Documentation/networking/README.ipw2200
Documentation/networking/bonding.txt
Documentation/networking/ip-sysctl.txt
Documentation/networking/netdevices.txt
MAINTAINERS
arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
arch/i386/kernel/cpu/cpufreq/longhaul.c
arch/i386/kernel/cpu/cpufreq/longrun.c
arch/i386/kernel/cpu/cpufreq/powernow-k7.c
arch/i386/kernel/cpu/cpufreq/powernow-k8.c
arch/i386/kernel/cpu/cpufreq/powernow-k8.h
arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
drivers/Kconfig
drivers/Makefile
drivers/acpi/pci_link.c
drivers/block/aoe/aoenet.c
drivers/char/agp/alpha-agp.c
drivers/char/agp/generic.c
drivers/char/agp/intel-agp.c
drivers/char/agp/uninorth-agp.c
drivers/connector/cn_queue.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_stats.c
drivers/cpufreq/freq_table.c
drivers/dma/Kconfig [new file with mode: 0644]
drivers/dma/Makefile [new file with mode: 0644]
drivers/dma/dmaengine.c [new file with mode: 0644]
drivers/dma/ioatdma.c [new file with mode: 0644]
drivers/dma/ioatdma.h [new file with mode: 0644]
drivers/dma/ioatdma_hw.h [new file with mode: 0644]
drivers/dma/ioatdma_io.h [new file with mode: 0644]
drivers/dma/ioatdma_registers.h [new file with mode: 0644]
drivers/dma/iovlock.c [new file with mode: 0644]
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/media/dvb/dvb-core/dvb_net.c
drivers/net/3c501.c
drivers/net/3c503.c
drivers/net/3c505.c
drivers/net/3c507.c
drivers/net/3c523.c
drivers/net/3c527.c
drivers/net/8139cp.c
drivers/net/8139too.c
drivers/net/Kconfig
drivers/net/Makefile
drivers/net/au1000_eth.c
drivers/net/au1000_eth.h
drivers/net/bnx2.c
drivers/net/bnx2.h
drivers/net/bnx2_fw.h
drivers/net/bonding/bond_main.c
drivers/net/cassini.c
drivers/net/e100.c
drivers/net/e1000/Makefile
drivers/net/e1000/e1000.h
drivers/net/e1000/e1000_ethtool.c
drivers/net/e1000/e1000_hw.c
drivers/net/e1000/e1000_hw.h
drivers/net/e1000/e1000_main.c
drivers/net/e1000/e1000_osdep.h
drivers/net/e1000/e1000_param.c
drivers/net/epic100.c
drivers/net/forcedeth.c
drivers/net/hamradio/6pack.c
drivers/net/hamradio/mkiss.c
drivers/net/hp-plus.c
drivers/net/hp.c
drivers/net/ibmlana.c
drivers/net/ibmlana.h
drivers/net/ibmveth.c
drivers/net/ibmveth.h
drivers/net/ifb.c
drivers/net/irda/Kconfig
drivers/net/irda/Makefile
drivers/net/irda/ali-ircc.c
drivers/net/irda/irda-usb.c
drivers/net/irda/irda-usb.h
drivers/net/irda/mcs7780.c [new file with mode: 0644]
drivers/net/irda/mcs7780.h [new file with mode: 0644]
drivers/net/irda/stir4200.c
drivers/net/irda/vlsi_ir.c
drivers/net/ixgb/Makefile
drivers/net/ixgb/ixgb.h
drivers/net/ixgb/ixgb_ee.c
drivers/net/ixgb/ixgb_ee.h
drivers/net/ixgb/ixgb_ethtool.c
drivers/net/ixgb/ixgb_hw.c
drivers/net/ixgb/ixgb_hw.h
drivers/net/ixgb/ixgb_ids.h
drivers/net/ixgb/ixgb_main.c
drivers/net/ixgb/ixgb_osdep.h
drivers/net/ixgb/ixgb_param.c
drivers/net/mv643xx_eth.c
drivers/net/myri10ge/Makefile [new file with mode: 0644]
drivers/net/myri10ge/myri10ge.c [new file with mode: 0644]
drivers/net/myri10ge/myri10ge_mcp.h [new file with mode: 0644]
drivers/net/myri10ge/myri10ge_mcp_gen_header.h [new file with mode: 0644]
drivers/net/natsemi.c
drivers/net/ne.c
drivers/net/ne2.c
drivers/net/pcmcia/pcnet_cs.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/smsc.c [new file with mode: 0644]
drivers/net/ppp_generic.c
drivers/net/r8169.c
drivers/net/s2io-regs.h
drivers/net/s2io.c
drivers/net/s2io.h
drivers/net/sis900.c
drivers/net/sis900.h
drivers/net/skge.c
drivers/net/skge.h
drivers/net/smc-ultra.c
drivers/net/smc-ultra32.c
drivers/net/smc911x.c [new file with mode: 0644]
drivers/net/smc911x.h [new file with mode: 0644]
drivers/net/smc9194.c
drivers/net/smc91x.h
drivers/net/sungem_phy.c
drivers/net/tg3.c
drivers/net/tg3.h
drivers/net/tulip/de2104x.c
drivers/net/tulip/de4x5.c
drivers/net/tulip/de4x5.h
drivers/net/tulip/dmfe.c
drivers/net/tulip/eeprom.c
drivers/net/tulip/interrupt.c
drivers/net/tulip/media.c
drivers/net/tulip/tulip.h
drivers/net/tulip/tulip_core.c
drivers/net/tulip/uli526x.c
drivers/net/tulip/winbond-840.c
drivers/net/tulip/xircom_cb.c
drivers/net/via-velocity.c
drivers/net/via-velocity.h
drivers/net/wan/pci200syn.c
drivers/net/wireless/Kconfig
drivers/net/wireless/Makefile
drivers/net/wireless/airo.c
drivers/net/wireless/bcm43xx/bcm43xx.h
drivers/net/wireless/bcm43xx/bcm43xx_debugfs.c
drivers/net/wireless/bcm43xx/bcm43xx_main.c
drivers/net/wireless/hermes.c
drivers/net/wireless/hermes.h
drivers/net/wireless/hostap/hostap_80211_tx.c
drivers/net/wireless/hostap/hostap_ap.c
drivers/net/wireless/hostap/hostap_cs.c
drivers/net/wireless/hostap/hostap_main.c
drivers/net/wireless/ipw2200.c
drivers/net/wireless/ipw2200.h
drivers/net/wireless/orinoco.c
drivers/net/wireless/orinoco.h
drivers/net/wireless/orinoco_cs.c
drivers/net/wireless/orinoco_nortel.c
drivers/net/wireless/orinoco_pci.c
drivers/net/wireless/orinoco_pci.h [new file with mode: 0644]
drivers/net/wireless/orinoco_plx.c
drivers/net/wireless/orinoco_tmd.c
drivers/net/wireless/spectrum_cs.c
drivers/net/wireless/zd1201.c [moved from drivers/usb/net/zd1201.c with 97% similarity]
drivers/net/wireless/zd1201.h [moved from drivers/usb/net/zd1201.h with 100% similarity]
drivers/pci/pci.c
drivers/s390/net/Makefile
drivers/s390/net/ctcmain.c
drivers/s390/net/ctcmain.h
drivers/s390/net/ctctty.c [deleted file]
drivers/s390/net/ctctty.h [deleted file]
drivers/scsi/libata-core.c
drivers/usb/net/Kconfig
drivers/usb/net/Makefile
drivers/video/Kconfig
drivers/video/intelfb/intelfb.h
drivers/video/intelfb/intelfbdrv.c
drivers/video/intelfb/intelfbhw.c
drivers/video/intelfb/intelfbhw.h
include/linux/console.h
include/linux/dmaengine.h [new file with mode: 0644]
include/linux/igmp.h
include/linux/netdevice.h
include/linux/netfilter/nf_conntrack_common.h
include/linux/netfilter/nfnetlink_conntrack.h
include/linux/netfilter/xt_CONNSECMARK.h [new file with mode: 0644]
include/linux/netfilter/xt_SECMARK.h [new file with mode: 0644]
include/linux/netfilter/xt_quota.h [new file with mode: 0644]
include/linux/netfilter/xt_statistic.h [new file with mode: 0644]
include/linux/netfilter_ipv4/ip_conntrack.h
include/linux/netfilter_ipv4/ip_conntrack_h323.h
include/linux/netfilter_ipv4/ip_conntrack_helper_h323_types.h
include/linux/netfilter_ipv4/ip_conntrack_sip.h [new file with mode: 0644]
include/linux/pci.h
include/linux/pci_ids.h
include/linux/pfkeyv2.h
include/linux/security.h
include/linux/selinux.h
include/linux/skbuff.h
include/linux/sysctl.h
include/linux/tcp.h
include/linux/xfrm.h
include/net/ieee80211.h
include/net/ieee80211softmac.h
include/net/ieee80211softmac_wx.h
include/net/ip.h
include/net/llc_if.h
include/net/netdma.h [new file with mode: 0644]
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_compat.h
include/net/raw.h
include/net/sctp/sctp.h
include/net/sctp/structs.h
include/net/sock.h
include/net/tcp.h
include/net/xfrm.h
kernel/power/main.c
kernel/printk.c
net/Kconfig
net/atm/clip.c
net/bridge/Makefile
net/bridge/br.c
net/bridge/br_device.c
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_netfilter.c
net/bridge/br_netlink.c [new file with mode: 0644]
net/bridge/br_notify.c
net/bridge/br_private.h
net/bridge/br_stp_if.c
net/core/Makefile
net/core/dev.c
net/core/dev_mcast.c
net/core/ethtool.c
net/core/netpoll.c
net/core/pktgen.c
net/core/skbuff.c
net/core/sock.c
net/core/user_dma.c [new file with mode: 0644]
net/dccp/proto.c
net/decnet/dn_nsp_in.c
net/decnet/dn_route.c
net/ieee80211/ieee80211_crypt_tkip.c
net/ieee80211/ieee80211_rx.c
net/ieee80211/ieee80211_tx.c
net/ieee80211/ieee80211_wx.c
net/ieee80211/softmac/Kconfig
net/ieee80211/softmac/ieee80211softmac_assoc.c
net/ieee80211/softmac/ieee80211softmac_auth.c
net/ieee80211/softmac/ieee80211softmac_event.c
net/ieee80211/softmac/ieee80211softmac_io.c
net/ieee80211/softmac/ieee80211softmac_module.c
net/ieee80211/softmac/ieee80211softmac_priv.h
net/ieee80211/softmac/ieee80211softmac_wx.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/ah4.c
net/ipv4/esp4.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ip_output.c
net/ipv4/ipcomp.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/ip_conntrack_amanda.c
net/ipv4/netfilter/ip_conntrack_core.c
net/ipv4/netfilter/ip_conntrack_ftp.c
net/ipv4/netfilter/ip_conntrack_helper_h323.c
net/ipv4/netfilter/ip_conntrack_helper_h323_types.c
net/ipv4/netfilter/ip_conntrack_netlink.c
net/ipv4/netfilter/ip_conntrack_proto_gre.c
net/ipv4/netfilter/ip_conntrack_proto_icmp.c
net/ipv4/netfilter/ip_conntrack_proto_tcp.c
net/ipv4/netfilter/ip_conntrack_proto_udp.c
net/ipv4/netfilter/ip_conntrack_sip.c [new file with mode: 0644]
net/ipv4/netfilter/ip_conntrack_standalone.c
net/ipv4/netfilter/ip_nat_helper_h323.c
net/ipv4/netfilter/ip_nat_sip.c [new file with mode: 0644]
net/ipv4/netfilter/ip_nat_snmp_basic.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/ipt_hashlimit.c
net/ipv4/netfilter/ipt_recent.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
net/ipv4/raw.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bic.c
net/ipv4/tcp_compound.c [new file with mode: 0644]
net/ipv4/tcp_cong.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_highspeed.c
net/ipv4/tcp_htcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_lp.c [new file with mode: 0644]
net/ipv4/tcp_output.c
net/ipv4/tcp_probe.c [new file with mode: 0644]
net/ipv4/tcp_veno.c [new file with mode: 0644]
net/ipv4/tcp_westwood.c
net/ipv4/xfrm4_input.c
net/ipv4/xfrm4_mode_transport.c [new file with mode: 0644]
net/ipv4/xfrm4_mode_tunnel.c [new file with mode: 0644]
net/ipv4/xfrm4_output.c
net/ipv4/xfrm4_policy.c
net/ipv4/xfrm4_state.c
net/ipv6/Kconfig
net/ipv6/Makefile
net/ipv6/addrconf.c
net/ipv6/ah6.c
net/ipv6/esp6.c
net/ipv6/ip6_output.c
net/ipv6/ipcomp6.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/tcp_ipv6.c
net/ipv6/xfrm6_input.c
net/ipv6/xfrm6_mode_transport.c [new file with mode: 0644]
net/ipv6/xfrm6_mode_tunnel.c [new file with mode: 0644]
net/ipv6/xfrm6_output.c
net/ipv6/xfrm6_policy.c
net/ipv6/xfrm6_state.c
net/ipx/ipx_route.c
net/irda/irlmp.c
net/key/af_key.c
net/llc/af_llc.c
net/llc/llc_if.c
net/llc/llc_input.c
net/llc/llc_sap.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_proto_udp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/xt_CONNSECMARK.c [new file with mode: 0644]
net/netfilter/xt_SECMARK.c [new file with mode: 0644]
net/netfilter/xt_connmark.c
net/netfilter/xt_dccp.c
net/netfilter/xt_mark.c
net/netfilter/xt_multiport.c
net/netfilter/xt_quota.c [new file with mode: 0644]
net/netfilter/xt_sctp.c
net/netfilter/xt_statistic.c [new file with mode: 0644]
net/netfilter/xt_string.c
net/sched/sch_generic.c
net/sched/sch_teql.c
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/output.c
net/sctp/outqueue.c
net/sctp/protocol.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/sctp/ulpevent.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
security/dummy.c
security/selinux/Kconfig
security/selinux/exports.c
security/selinux/hooks.c
security/selinux/include/av_inherit.h
security/selinux/include/av_perm_to_string.h
security/selinux/include/av_permissions.h
security/selinux/include/class_to_string.h
security/selinux/include/flask.h
security/selinux/include/xfrm.h
security/selinux/selinuxfs.c
security/selinux/xfrm.c

index b3a6187e53051866004696e4d5718cd85240e1c2..a9d3a1794b232da271d71178fbd05c3078e87723 100644 (file)
@@ -1402,6 +1402,15 @@ running once the system is up.
                        If enabled at boot time, /selinux/disable can be used
                        later to disable prior to initial policy load.
 
+       selinux_compat_net =
+                       [SELINUX] Set initial selinux_compat_net flag value.
+                        Format: { "0" | "1" }
+                        0 -- use new secmark-based packet controls
+                        1 -- use legacy packet controls
+                        Default value is 0 (preferred).
+                        Value can be changed at runtime via
+                        /selinux/compat_net.
+
        serialnumber    [BUGS=IA-32]
 
        sg_def_reserved_size=   [SCSI]
index acb30c5dcff333e11f9f9fd72957b71b3a9af02c..4f2a40f1dbc629837463a96695759e3848f41c68 100644 (file)
@@ -14,8 +14,8 @@ Copyright (C) 2004-2006, Intel Corporation
 
 README.ipw2200
 
-Version: 1.0.8
-Date   : October 20, 2005
+Version: 1.1.2
+Date   : March 30, 2006
 
 
 Index
@@ -103,7 +103,7 @@ file.
 
 1.1. Overview of Features
 -----------------------------------------------
-The current release (1.0.8) supports the following features:
+The current release (1.1.2) supports the following features:
 
 + BSS mode (Infrastructure, Managed)
 + IBSS mode (Ad-Hoc)
@@ -247,8 +247,8 @@ and can set the contents via echo.  For example:
 % cat /sys/bus/pci/drivers/ipw2200/debug_level
 
 Will report the current debug level of the driver's logging subsystem 
-(only available if CONFIG_IPW_DEBUG was configured when the driver was 
-built).
+(only available if CONFIG_IPW2200_DEBUG was configured when the driver
+was built).
 
 You can set the debug level via:
 
index 8d8b4e5ea184a26da6bfab62bed0f796ce16679a..afac780445cd19a9042a629fa32245d11806e610 100644 (file)
@@ -1,7 +1,7 @@
 
                Linux Ethernet Bonding Driver HOWTO
 
-               Latest update: 21 June 2005
+               Latest update: 24 April 2006
 
 Initial release : Thomas Davis <tadavis at lbl.gov>
 Corrections, HA extensions : 2000/10/03-15 :
@@ -12,6 +12,8 @@ Corrections, HA extensions : 2000/10/03-15 :
   - Jay Vosburgh <fubar at us dot ibm dot com>
 
 Reorganized and updated Feb 2005 by Jay Vosburgh
+Added Sysfs information: 2006/04/24
+  - Mitch Williams <mitch.a.williams at intel.com>
 
 Introduction
 ============
@@ -38,61 +40,62 @@ Table of Contents
 2. Bonding Driver Options
 
 3. Configuring Bonding Devices
-3.1    Configuration with sysconfig support
-3.1.1          Using DHCP with sysconfig
-3.1.2          Configuring Multiple Bonds with sysconfig
-3.2    Configuration with initscripts support
-3.2.1          Using DHCP with initscripts
-3.2.2          Configuring Multiple Bonds with initscripts
-3.3    Configuring Bonding Manually
+3.1    Configuration with Sysconfig Support
+3.1.1          Using DHCP with Sysconfig
+3.1.2          Configuring Multiple Bonds with Sysconfig
+3.2    Configuration with Initscripts Support
+3.2.1          Using DHCP with Initscripts
+3.2.2          Configuring Multiple Bonds with Initscripts
+3.3    Configuring Bonding Manually with Ifenslave
 3.3.1          Configuring Multiple Bonds Manually
+3.4    Configuring Bonding Manually via Sysfs
 
-5. Querying Bonding Configuration
-5.1    Bonding Configuration
-5.2    Network Configuration
+4. Querying Bonding Configuration
+4.1    Bonding Configuration
+4.2    Network Configuration
 
-6. Switch Configuration
+5. Switch Configuration
 
-7. 802.1q VLAN Support
+6. 802.1q VLAN Support
 
-8. Link Monitoring
-8.1    ARP Monitor Operation
-8.2    Configuring Multiple ARP Targets
-8.3    MII Monitor Operation
+7. Link Monitoring
+7.1    ARP Monitor Operation
+7.2    Configuring Multiple ARP Targets
+7.3    MII Monitor Operation
 
-9. Potential Trouble Sources
-9.1    Adventures in Routing
-9.2    Ethernet Device Renaming
-9.3    Painfully Slow Or No Failed Link Detection By Miimon
+8. Potential Trouble Sources
+8.1    Adventures in Routing
+8.2    Ethernet Device Renaming
+8.3    Painfully Slow Or No Failed Link Detection By Miimon
 
-10. SNMP agents
+9. SNMP agents
 
-11. Promiscuous mode
+10. Promiscuous mode
 
-12. Configuring Bonding for High Availability
-12.1   High Availability in a Single Switch Topology
-12.2   High Availability in a Multiple Switch Topology
-12.2.1         HA Bonding Mode Selection for Multiple Switch Topology
-12.2.2         HA Link Monitoring for Multiple Switch Topology
+11. Configuring Bonding for High Availability
+11.1   High Availability in a Single Switch Topology
+11.2   High Availability in a Multiple Switch Topology
+11.2.1         HA Bonding Mode Selection for Multiple Switch Topology
+11.2.2         HA Link Monitoring for Multiple Switch Topology
 
-13. Configuring Bonding for Maximum Throughput
-13.1   Maximum Throughput in a Single Switch Topology
-13.1.1         MT Bonding Mode Selection for Single Switch Topology
-13.1.2         MT Link Monitoring for Single Switch Topology
-13.2   Maximum Throughput in a Multiple Switch Topology
-13.2.1         MT Bonding Mode Selection for Multiple Switch Topology
-13.2.2         MT Link Monitoring for Multiple Switch Topology
+12. Configuring Bonding for Maximum Throughput
+12.1   Maximum Throughput in a Single Switch Topology
+12.1.1         MT Bonding Mode Selection for Single Switch Topology
+12.1.2         MT Link Monitoring for Single Switch Topology
+12.2   Maximum Throughput in a Multiple Switch Topology
+12.2.1         MT Bonding Mode Selection for Multiple Switch Topology
+12.2.2         MT Link Monitoring for Multiple Switch Topology
 
-14. Switch Behavior Issues
-14.1   Link Establishment and Failover Delays
-14.2   Duplicated Incoming Packets
+13. Switch Behavior Issues
+13.1   Link Establishment and Failover Delays
+13.2   Duplicated Incoming Packets
 
-15. Hardware Specific Considerations
-15.1   IBM BladeCenter
+14. Hardware Specific Considerations
+14.1   IBM BladeCenter
 
-16. Frequently Asked Questions
+15. Frequently Asked Questions
 
-17. Resources and Links
+16. Resources and Links
 
 
 1. Bonding Driver Installation
@@ -156,6 +159,9 @@ you're trying to build it for.  Some distros (e.g., Red Hat from 7.1
 onwards) do not have /usr/include/linux symbolically linked to the
 default kernel source include directory.
 
+SECOND IMPORTANT NOTE:
+       If you plan to configure bonding using sysfs, you do not need
+to use ifenslave.
 
 2. Bonding Driver Options
 =========================
@@ -270,7 +276,7 @@ mode
                In bonding version 2.6.2 or later, when a failover
                occurs in active-backup mode, bonding will issue one
                or more gratuitous ARPs on the newly active slave.
-               One gratutious ARP is issued for the bonding master
+               One gratuitous ARP is issued for the bonding master
                interface and each VLAN interfaces configured above
                it, provided that the interface has at least one IP
                address configured.  Gratuitous ARPs issued for VLAN
@@ -377,7 +383,7 @@ mode
                When a link is reconnected or a new slave joins the
                bond the receive traffic is redistributed among all
                active slaves in the bond by initiating ARP Replies
-               with the selected mac address to each of the
+               with the selected MAC address to each of the
                clients. The updelay parameter (detailed below) must
                be set to a value equal or greater than the switch's
                forwarding delay so that the ARP Replies sent to the
@@ -498,11 +504,12 @@ not exist, and the layer2 policy is the only policy.
 3. Configuring Bonding Devices
 ==============================
 
-       There are, essentially, two methods for configuring bonding:
-with support from the distro's network initialization scripts, and
-without.  Distros generally use one of two packages for the network
-initialization scripts: initscripts or sysconfig.  Recent versions of
-these packages have support for bonding, while older versions do not.
+       You can configure bonding using either your distro's network
+initialization scripts, or manually using either ifenslave or the
+sysfs interface.  Distros generally use one of two packages for the
+network initialization scripts: initscripts or sysconfig.  Recent
+versions of these packages have support for bonding, while older
+versions do not.
 
        We will first describe the options for configuring bonding for
 distros using versions of initscripts and sysconfig with full or
@@ -530,7 +537,7 @@ $ grep ifenslave /sbin/ifup
        If this returns any matches, then your initscripts or
 sysconfig has support for bonding.
 
-3.1 Configuration with sysconfig support
+3.1 Configuration with Sysconfig Support
 ----------------------------------------
 
        This section applies to distros using a version of sysconfig
@@ -538,7 +545,7 @@ with bonding support, for example, SuSE Linux Enterprise Server 9.
 
        SuSE SLES 9's networking configuration system does support
 bonding, however, at this writing, the YaST system configuration
-frontend does not provide any means to work with bonding devices.
+front end does not provide any means to work with bonding devices.
 Bonding devices can be managed by hand, however, as follows.
 
        First, if they have not already been configured, configure the
@@ -660,7 +667,7 @@ format can be found in an example ifcfg template file:
        Note that the template does not document the various BONDING_
 settings described above, but does describe many of the other options.
 
-3.1.1 Using DHCP with sysconfig
+3.1.1 Using DHCP with Sysconfig
 -------------------------------
 
        Under sysconfig, configuring a device with BOOTPROTO='dhcp'
@@ -670,7 +677,7 @@ attempt to obtain the device address from DHCP prior to adding any of
 the slave devices.  Without active slaves, the DHCP requests are not
 sent to the network.
 
-3.1.2 Configuring Multiple Bonds with sysconfig
+3.1.2 Configuring Multiple Bonds with Sysconfig
 -----------------------------------------------
 
        The sysconfig network initialization system is capable of
@@ -685,7 +692,7 @@ ifcfg-bondX files.
 options in the ifcfg-bondX file, it is not necessary to add them to
 the system /etc/modules.conf or /etc/modprobe.conf configuration file.
 
-3.2 Configuration with initscripts support
+3.2 Configuration with Initscripts Support
 ------------------------------------------
 
        This section applies to distros using a version of initscripts
@@ -756,7 +763,7 @@ options for your configuration.
 will restart the networking subsystem and your bond link should be now
 up and running.
 
-3.2.1 Using DHCP with initscripts
+3.2.1 Using DHCP with Initscripts
 ---------------------------------
 
        Recent versions of initscripts (the version supplied with
@@ -768,7 +775,7 @@ above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
 and add a line consisting of "TYPE=Bonding".  Note that the TYPE value
 is case sensitive.
 
-3.2.2 Configuring Multiple Bonds with initscripts
+3.2.2 Configuring Multiple Bonds with Initscripts
 -------------------------------------------------
 
        At this writing, the initscripts package does not directly
@@ -784,8 +791,8 @@ Fedora Core kernels, and has been seen on RHEL 4 as well.  On kernels
 exhibiting this problem, it will be impossible to configure multiple
 bonds with differing parameters.
 
-3.3 Configuring Bonding Manually
---------------------------------
+3.3 Configuring Bonding Manually with Ifenslave
+-----------------------------------------------
 
        This section applies to distros whose network initialization
 scripts (the sysconfig or initscripts package) do not have specific
@@ -889,11 +896,139 @@ install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
        This may be repeated any number of times, specifying a new and
 unique name in place of bond1 for each subsequent instance.
 
+3.4 Configuring Bonding Manually via Sysfs
+------------------------------------------
+
+       Starting with version 3.0, Channel Bonding may be configured
+via the sysfs interface.  This interface allows dynamic configuration
+of all bonds in the system without unloading the module.  It also
+allows for adding and removing bonds at runtime.  Ifenslave is no
+longer required, though it is still supported.
+
+       Use of the sysfs interface allows you to use multiple bonds
+with different configurations without having to reload the module.
+It also allows you to use multiple, differently configured bonds when
+bonding is compiled into the kernel.
+
+       You must have the sysfs filesystem mounted to configure
+bonding this way.  The examples in this document assume that you
+are using the standard mount point for sysfs, e.g. /sys.  If your
+sysfs filesystem is mounted elsewhere, you will need to adjust the
+example paths accordingly.
+
+Creating and Destroying Bonds
+-----------------------------
+To add a new bond foo:
+# echo +foo > /sys/class/net/bonding_masters
+
+To remove an existing bond bar:
+# echo -bar > /sys/class/net/bonding_masters
+
+To show all existing bonds:
+# cat /sys/class/net/bonding_masters
+
+NOTE: due to 4K size limitation of sysfs files, this list may be
+truncated if you have more than a few hundred bonds.  This is unlikely
+to occur under normal operating conditions.
+
+Adding and Removing Slaves
+--------------------------
+       Interfaces may be enslaved to a bond using the file
+/sys/class/net/<bond>/bonding/slaves.  The semantics for this file
+are the same as for the bonding_masters file.
+
+To enslave interface eth0 to bond bond0:
+# ifconfig bond0 up
+# echo +eth0 > /sys/class/net/bond0/bonding/slaves
+
+To free slave eth0 from bond bond0:
+# echo -eth0 > /sys/class/net/bond0/bonding/slaves
+
+       NOTE: The bond must be up before slaves can be added.  All
+slaves are freed when the interface is brought down.
+
+       When an interface is enslaved to a bond, symlinks between the
+two are created in the sysfs filesystem.  In this case, you would get
+/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
+/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
+
+       This means that you can tell quickly whether or not an
+interface is enslaved by looking for the master symlink.  Thus:
+# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
+will free eth0 from whatever bond it is enslaved to, regardless of
+the name of the bond interface.
+
+Changing a Bond's Configuration
+-------------------------------
+       Each bond may be configured individually by manipulating the
+files located in /sys/class/net/<bond name>/bonding
+
+       The names of these files correspond directly with the command-
+line parameters described elsewhere in in this file, and, with the
+exception of arp_ip_target, they accept the same values.  To see the
+current setting, simply cat the appropriate file.
+
+       A few examples will be given here; for specific usage
+guidelines for each parameter, see the appropriate section in this
+document.
+
+To configure bond0 for balance-alb mode:
+# ifconfig bond0 down
+# echo 6 > /sys/class/net/bond0/bonding/mode
+ - or -
+# echo balance-alb > /sys/class/net/bond0/bonding/mode
+       NOTE: The bond interface must be down before the mode can be
+changed.
+
+To enable MII monitoring on bond0 with a 1 second interval:
+# echo 1000 > /sys/class/net/bond0/bonding/miimon
+       NOTE: If ARP monitoring is enabled, it will disabled when MII
+monitoring is enabled, and vice-versa.
+
+To add ARP targets:
+# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
+       NOTE:  up to 10 target addresses may be specified.
+
+To remove an ARP target:
+# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+
+Example Configuration
+---------------------
+       We begin with the same example that is shown in section 3.3,
+executed with sysfs, and without using ifenslave.
+
+       To make a simple bond of two e100 devices (presumed to be eth0
+and eth1), and have it persist across reboots, edit the appropriate
+file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
+following:
+
+modprobe bonding
+modprobe e100
+echo balance-alb > /sys/class/net/bond0/bonding/mode
+ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
+echo 100 > /sys/class/net/bond0/bonding/miimon
+echo +eth0 > /sys/class/net/bond0/bonding/slaves
+echo +eth1 > /sys/class/net/bond0/bonding/slaves
+
+       To add a second bond, with two e1000 interfaces in
+active-backup mode, using ARP monitoring, add the following lines to
+your init script:
+
+modprobe e1000
+echo +bond1 > /sys/class/net/bonding_masters
+echo active-backup > /sys/class/net/bond1/bonding/mode
+ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
+echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
+echo 2000 > /sys/class/net/bond1/bonding/arp_interval
+echo +eth2 > /sys/class/net/bond1/bonding/slaves
+echo +eth3 > /sys/class/net/bond1/bonding/slaves
+
 
-5. Querying Bonding Configuration 
+4. Querying Bonding Configuration 
 =================================
 
-5.1 Bonding Configuration
+4.1 Bonding Configuration
 -------------------------
 
        Each bonding device has a read-only file residing in the
@@ -923,7 +1058,7 @@ generally as follows:
        The precise format and contents will change depending upon the
 bonding configuration, state, and version of the bonding driver.
 
-5.2 Network configuration
+4.2 Network configuration
 -------------------------
 
        The network configuration can be inspected using the ifconfig
@@ -958,7 +1093,7 @@ eth1      Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
           collisions:0 txqueuelen:100
           Interrupt:9 Base address:0x1400
 
-6. Switch Configuration
+5. Switch Configuration
 =======================
 
        For this section, "switch" refers to whatever system the
@@ -991,7 +1126,7 @@ transmit policy for an EtherChannel group; all three will interoperate
 with another EtherChannel group.
 
 
-7. 802.1q VLAN Support
+6. 802.1q VLAN Support
 ======================
 
        It is possible to configure VLAN devices over a bond interface
@@ -1042,7 +1177,7 @@ underlying device -- i.e. the bonding interface -- to promiscuous
 mode, which might not be what you want.
 
 
-8. Link Monitoring
+7. Link Monitoring
 ==================
 
        The bonding driver at present supports two schemes for
@@ -1053,7 +1188,7 @@ monitor.
 bonding driver itself, it is not possible to enable both ARP and MII
 monitoring simultaneously.
 
-8.1 ARP Monitor Operation
+7.1 ARP Monitor Operation
 -------------------------
 
        The ARP monitor operates as its name suggests: it sends ARP
@@ -1071,7 +1206,7 @@ those slaves will stay down.  If networking monitoring (tcpdump, etc)
 shows the ARP requests and replies on the network, then it may be that
 your device driver is not updating last_rx and trans_start.
 
-8.2 Configuring Multiple ARP Targets
+7.2 Configuring Multiple ARP Targets
 ------------------------------------
 
        While ARP monitoring can be done with just one target, it can
@@ -1094,7 +1229,7 @@ alias bond0 bonding
 options bond0 arp_interval=60 arp_ip_target=192.168.0.100
 
 
-8.3 MII Monitor Operation
+7.3 MII Monitor Operation
 -------------------------
 
        The MII monitor monitors only the carrier state of the local
@@ -1120,14 +1255,14 @@ does not support or had some error in processing both the MII register
 and ethtool requests), then the MII monitor will assume the link is
 up.
 
-9. Potential Sources of Trouble
+8. Potential Sources of Trouble
 ===============================
 
-9.1 Adventures in Routing
+8.1 Adventures in Routing
 -------------------------
 
        When bonding is configured, it is important that the slave
-devices not have routes that supercede routes of the master (or,
+devices not have routes that supersede routes of the master (or,
 generally, not have routes at all).  For example, suppose the bonding
 device bond0 has two slaves, eth0 and eth1, and the routing table is
 as follows:
@@ -1154,11 +1289,11 @@ by the state of the routing table.
 
        The solution here is simply to insure that slaves do not have
 routes of their own, and if for some reason they must, those routes do
-not supercede routes of their master.  This should generally be the
+not supersede routes of their master.  This should generally be the
 case, but unusual configurations or errant manual or automatic static
 route additions may cause trouble.
 
-9.2 Ethernet Device Renaming
+8.2 Ethernet Device Renaming
 ----------------------------
 
        On systems with network configuration scripts that do not
@@ -1207,7 +1342,7 @@ modprobe with --ignore-install to cause the normal action to then take
 place.  Full documentation on this can be found in the modprobe.conf
 and modprobe manual pages.
 
-9.3. Painfully Slow Or No Failed Link Detection By Miimon
+8.3. Painfully Slow Or No Failed Link Detection By Miimon
 ---------------------------------------------------------
 
        By default, bonding enables the use_carrier option, which
@@ -1235,7 +1370,7 @@ carrier state.  It has no way to determine the state of devices on or
 beyond other ports of a switch, or if a switch is refusing to pass
 traffic while still maintaining carrier on.
 
-10. SNMP agents
+9. SNMP agents
 ===============
 
        If running SNMP agents, the bonding driver should be loaded
@@ -1281,7 +1416,7 @@ ifDescr, the association between the IP address and IfIndex remains
 and SNMP functions such as Interface_Scan_Next will report that
 association.
 
-11. Promiscuous mode
+10. Promiscuous mode
 ====================
 
        When running network monitoring tools, e.g., tcpdump, it is
@@ -1308,7 +1443,7 @@ sending to peers that are unassigned or if the load is unbalanced.
 the active slave changes (e.g., due to a link failure), the
 promiscuous setting will be propagated to the new active slave.
 
-12. Configuring Bonding for High Availability
+11. Configuring Bonding for High Availability
 =============================================
 
        High Availability refers to configurations that provide
@@ -1318,7 +1453,7 @@ goal is to provide the maximum availability of network connectivity
 (i.e., the network always works), even though other configurations
 could provide higher throughput.
 
-12.1 High Availability in a Single Switch Topology
+11.1 High Availability in a Single Switch Topology
 --------------------------------------------------
 
        If two hosts (or a host and a single switch) are directly
@@ -1332,7 +1467,7 @@ the load will be rebalanced across the remaining devices.
        See Section 13, "Configuring Bonding for Maximum Throughput"
 for information on configuring bonding with one peer device.
 
-12.2 High Availability in a Multiple Switch Topology
+11.2 High Availability in a Multiple Switch Topology
 ----------------------------------------------------
 
        With multiple switches, the configuration of bonding and the
@@ -1359,7 +1494,7 @@ switches (ISL, or inter switch link), and multiple ports connecting to
 the outside world ("port3" on each switch).  There is no technical
 reason that this could not be extended to a third switch.
 
-12.2.1 HA Bonding Mode Selection for Multiple Switch Topology
+11.2.1 HA Bonding Mode Selection for Multiple Switch Topology
 -------------------------------------------------------------
 
        In a topology such as the example above, the active-backup and
@@ -1381,7 +1516,7 @@ broadcast: This mode is really a special purpose mode, and is suitable
        necessary for some specific one-way traffic to reach both
        independent networks, then the broadcast mode may be suitable.
 
-12.2.2 HA Link Monitoring Selection for Multiple Switch Topology
+11.2.2 HA Link Monitoring Selection for Multiple Switch Topology
 ----------------------------------------------------------------
 
        The choice of link monitoring ultimately depends upon your
@@ -1402,10 +1537,10 @@ regardless of which switch is active, the ARP monitor has a suitable
 target to query.
 
 
-13. Configuring Bonding for Maximum Throughput
+12. Configuring Bonding for Maximum Throughput
 ==============================================
 
-13.1 Maximizing Throughput in a Single Switch Topology
+12.1 Maximizing Throughput in a Single Switch Topology
 ------------------------------------------------------
 
        In a single switch configuration, the best method to maximize
@@ -1476,7 +1611,7 @@ destination to make load balancing decisions.  The behavior of each
 mode is described below.
 
 
-13.1.1 MT Bonding Mode Selection for Single Switch Topology
+12.1.1 MT Bonding Mode Selection for Single Switch Topology
 -----------------------------------------------------------
 
        This configuration is the easiest to set up and to understand,
@@ -1607,7 +1742,7 @@ balance-alb: This mode is everything that balance-tlb is, and more.
        device driver must support changing the hardware address while
        the device is open.
 
-13.1.2 MT Link Monitoring for Single Switch Topology
+12.1.2 MT Link Monitoring for Single Switch Topology
 ----------------------------------------------------
 
        The choice of link monitoring may largely depend upon which
@@ -1616,7 +1751,7 @@ support the use of the ARP monitor, and are thus restricted to using
 the MII monitor (which does not provide as high a level of end to end
 assurance as the ARP monitor).
 
-13.2 Maximum Throughput in a Multiple Switch Topology
+12.2 Maximum Throughput in a Multiple Switch Topology
 -----------------------------------------------------
 
        Multiple switches may be utilized to optimize for throughput
@@ -1651,7 +1786,7 @@ a single 72 port switch.
 can be equipped with an additional network device connected to an
 external network; this host then additionally acts as a gateway.
 
-13.2.1 MT Bonding Mode Selection for Multiple Switch Topology
+12.2.1 MT Bonding Mode Selection for Multiple Switch Topology
 -------------------------------------------------------------
 
        In actual practice, the bonding mode typically employed in
@@ -1664,7 +1799,7 @@ packets has arrived).  When employed in this fashion, the balance-rr
 mode allows individual connections between two hosts to effectively
 utilize greater than one interface's bandwidth.
 
-13.2.2 MT Link Monitoring for Multiple Switch Topology
+12.2.2 MT Link Monitoring for Multiple Switch Topology
 ------------------------------------------------------
 
        Again, in actual practice, the MII monitor is most often used
@@ -1674,10 +1809,10 @@ advantages over the MII monitor are mitigated by the volume of probes
 needed as the number of systems involved grows (remember that each
 host in the network is configured with bonding).
 
-14. Switch Behavior Issues
+13. Switch Behavior Issues
 ==========================
 
-14.1 Link Establishment and Failover Delays
+13.1 Link Establishment and Failover Delays
 -------------------------------------------
 
        Some switches exhibit undesirable behavior with regard to the
@@ -1712,7 +1847,7 @@ switches take a long time to go into backup mode, it may be desirable
 to not activate a backup interface immediately after a link goes down.
 Failover may be delayed via the downdelay bonding module option.
 
-14.2 Duplicated Incoming Packets
+13.2 Duplicated Incoming Packets
 --------------------------------
 
        It is not uncommon to observe a short burst of duplicated
@@ -1751,14 +1886,14 @@ behavior, it can be induced by clearing the MAC forwarding table (on
 most Cisco switches, the privileged command "clear mac address-table
 dynamic" will accomplish this).
 
-15. Hardware Specific Considerations
+14. Hardware Specific Considerations
 ====================================
 
        This section contains additional information for configuring
 bonding on specific hardware platforms, or for interfacing bonding
 with particular switches or other devices.
 
-15.1 IBM BladeCenter
+14.1 IBM BladeCenter
 --------------------
 
        This applies to the JS20 and similar systems.
@@ -1861,7 +1996,7 @@ bonding driver.
 avoid fail-over delay issues when using bonding.
 
        
-16. Frequently Asked Questions
+15. Frequently Asked Questions
 ==============================
 
 1.  Is it SMP safe?
@@ -1925,7 +2060,7 @@ not have special switch requirements, but do need device drivers that
 support specific features (described in the appropriate section under
 module parameters, above).
 
-       In 802.3ad mode, it works with with systems that support IEEE
+       In 802.3ad mode, it works with systems that support IEEE
 802.3ad Dynamic Link Aggregation.  Most managed and many unmanaged
 switches currently available support 802.3ad.
 
index f12007b80a466e33fad7efb03dfe7e4f20913462..d46338af6002997b0593d4875a6aee825c77126f 100644 (file)
@@ -362,6 +362,13 @@ tcp_workaround_signed_windows - BOOLEAN
        not receive a window scaling option from them.
        Default: 0
 
+tcp_slow_start_after_idle - BOOLEAN
+       If set, provide RFC2861 behavior and time out the congestion
+       window after an idle period.  An idle period is defined at
+       the current RTO.  If unset, the congestion window will not
+       be timed out after an idle period.
+       Default: 1
+
 IP Variables:
 
 ip_local_port_range - 2 INTEGERS
index 3c0a5ba614d7c39146668f0b990e3fab03cceb06..847cedb238f6c0ed24ad6568b1d440bc3843fc6f 100644 (file)
@@ -42,9 +42,9 @@ dev->get_stats:
        Context: nominally process, but don't sleep inside an rwlock
 
 dev->hard_start_xmit:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        When the driver sets NETIF_F_LLTX in dev->features this will be
-       called without holding xmit_lock. In this case the driver 
+       called without holding netif_tx_lock. In this case the driver
        has to lock by itself when needed. It is recommended to use a try lock
        for this and return -1 when the spin lock fails. 
        The locking there should also properly protect against 
@@ -62,12 +62,12 @@ dev->hard_start_xmit:
          Only valid when NETIF_F_LLTX is set.
 
 dev->tx_timeout:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        Context: BHs disabled
        Notes: netif_queue_stopped() is guaranteed true
 
 dev->set_multicast_list:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        Context: BHs disabled
 
 dev->poll:
index c3c5842402df553bc497a594fac2b30b7d4a3b84..1421f74b6009ffcaa44081f0843b12f92a55a243 100644 (file)
@@ -1425,6 +1425,8 @@ P:        Jesse Brandeburg
 M:     jesse.brandeburg@intel.com
 P:     Jeff Kirsher
 M:     jeffrey.t.kirsher@intel.com
+P:     Auke Kok
+M:     auke-jan.h.kok@intel.com
 W:     http://sourceforge.net/projects/e1000/
 S:     Supported
 
@@ -1437,6 +1439,8 @@ P:        Jesse Brandeburg
 M:     jesse.brandeburg@intel.com
 P:     Jeff Kirsher
 M:     jeffrey.t.kirsher@intel.com
+P:     Auke Kok
+M:     auke-jan.h.kok@intel.com
 W:     http://sourceforge.net/projects/e1000/
 S:     Supported
 
@@ -1449,6 +1453,8 @@ P:        John Ronciak
 M:     john.ronciak@intel.com
 P:     Jesse Brandeburg
 M:     jesse.brandeburg@intel.com
+P:     Auke Kok
+M:     auke-jan.h.kok@intel.com
 W:     http://sourceforge.net/projects/e1000/
 S:     Supported
 
index 3852d0a4c1b5e2f35dc168399d12b97882ca8938..1a7bdcef19261deff5a7ea8ee13d5a8ddb434a19 100644 (file)
@@ -104,7 +104,7 @@ acpi_processor_set_performance (
 {
        u16                     port = 0;
        u8                      bit_width = 0;
-       int                     ret = 0;
+       int                     ret;
        u32                     value = 0;
        int                     i = 0;
        struct cpufreq_freqs    cpufreq_freqs;
@@ -195,7 +195,6 @@ acpi_processor_set_performance (
                        udelay(10);
                }
        } else {
-               i = 0;
                value = (u32) data->acpi_data.states[state].status;
        }
 
@@ -444,14 +443,15 @@ static struct freq_attr* acpi_cpufreq_attr[] = {
 };
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
-       .verify         = acpi_cpufreq_verify,
-       .target         = acpi_cpufreq_target,
-       .init           = acpi_cpufreq_cpu_init,
-       .exit           = acpi_cpufreq_cpu_exit,
-       .resume         = acpi_cpufreq_resume,
-       .name           = "acpi-cpufreq",
-       .owner          = THIS_MODULE,
-       .attr           = acpi_cpufreq_attr,
+       .verify = acpi_cpufreq_verify,
+       .target = acpi_cpufreq_target,
+       .init   = acpi_cpufreq_cpu_init,
+       .exit   = acpi_cpufreq_cpu_exit,
+       .resume = acpi_cpufreq_resume,
+       .name   = "acpi-cpufreq",
+       .owner  = THIS_MODULE,
+       .attr   = acpi_cpufreq_attr,
+       .flags  = CPUFREQ_STICKY,
 };
 
 
index f275e0d4aee525a5868963e6e8240a2b531cb2cd..0d49d73d1b711274387f6f23988415073774bc78 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * (C) 2004  Sebastian Witt <se.witt@gmx.net>
+ * (C) 2004-2006  Sebastian Witt <se.witt@gmx.net>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
  *  Based upon reverse engineered information
@@ -90,7 +90,7 @@ static int nforce2_calc_pll(unsigned int fsb)
 
        /* Try to calculate multiplier and divider up to 4 times */
        while (((mul == 0) || (div == 0)) && (tried <= 3)) {
-               for (xdiv = 1; xdiv <= 0x80; xdiv++)
+               for (xdiv = 2; xdiv <= 0x80; xdiv++)
                        for (xmul = 1; xmul <= 0xfe; xmul++)
                                if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
                                    fsb + tried) {
@@ -117,8 +117,7 @@ static void nforce2_write_pll(int pll)
        int temp;
 
        /* Set the pll addr. to 0x00 */
-       temp = 0x00;
-       pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp);
+       pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0);
 
        /* Now write the value in all 64 registers */
        for (temp = 0; temp <= 0x3f; temp++)
@@ -266,7 +265,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
        if (freqs.old == freqs.new)
                return 0;
 
-       dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n",
+       dprintk("Old CPU frequency %d kHz, new %d kHz\n",
               freqs.old, freqs.new);
 
        cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -278,7 +277,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
                printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
                        target_fsb);
        else
-               dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n",
+               dprintk("Changed FSB successfully to %d\n",
                        target_fsb);
 
        /* Enable IRQs */
index 8ef38544453c24cb8488d3202ed23750890d4566..146f607e9c44addc0e5cbb3856b2763cadd46173 100644 (file)
@@ -77,13 +77,17 @@ static char speedbuffer[8];
 
 static char *print_speed(int speed)
 {
-       if (speed > 1000) {
-               if (speed%1000 == 0)
-                       sprintf (speedbuffer, "%dGHz", speed/1000);
-               else
-                       sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100);
-       } else
-               sprintf (speedbuffer, "%dMHz", speed);
+       if (speed < 1000) {
+               snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed);
+               return speedbuffer;
+       }
+
+       if (speed%1000 == 0)
+               snprintf(speedbuffer, sizeof(speedbuffer),
+                       "%dGHz", speed/1000);
+       else
+               snprintf(speedbuffer, sizeof(speedbuffer),
+                       "%d.%dGHz", speed/1000, (speed%1000)/100);
 
        return speedbuffer;
 }
@@ -675,7 +679,7 @@ static int __init longhaul_init(void)
 
 static void __exit longhaul_exit(void)
 {
-       int i=0;
+       int i;
 
        for (i=0; i < numscales; i++) {
                if (clock_ratio[i] == maxmult) {
index e3868de4dc2e9c6bdb1a0ab74fef819b926ea195..b2689514295ab361057623dd7c2afb56d7deb81e 100644 (file)
@@ -223,7 +223,6 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
                /* set to 0 to try_hi perf_pctg */
                msr_lo &= 0xFFFFFF80;
                msr_hi &= 0xFFFFFF80;
-               msr_lo |= 0;
                msr_hi |= try_hi;
                wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
 
index 2bf4237cb94ea85e8bd3d39ee4242643e1fdf3b8..694d4793bf6ae5f1aaf1bddae150e958f572fecf 100644 (file)
@@ -452,23 +452,23 @@ static int powernow_decode_bios (int maxfid, int startvid)
 
                        pst = (struct pst_s *) p;
 
-                       for (i = 0 ; i <psb->numpst; i++) {
+                       for (j=0; j<psb->numpst; j++) {
                                pst = (struct pst_s *) p;
                                number_scales = pst->numpstates;
 
                                if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
                                    (maxfid==pst->maxfid) && (startvid==pst->startvid))
                                {
-                                       dprintk ("PST:%d (@%p)\n", i, pst);
+                                       dprintk ("PST:%d (@%p)\n", j, pst);
                                        dprintk (" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
                                                 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
 
                                        ret = get_ranges ((char *) pst + sizeof (struct pst_s));
                                        return ret;
-
                                } else {
+                                       unsigned int k;
                                        p = (char *) pst + sizeof (struct pst_s);
-                                       for (j=0 ; j < number_scales; j++)
+                                       for (k=0; k<number_scales; k++)
                                                p+=2;
                                }
                        }
@@ -581,10 +581,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 
        rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
 
-       /* recalibrate cpu_khz */
-       result = recalibrate_cpu_khz();
-       if (result)
-               return result;
+       recalibrate_cpu_khz();
 
        fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
        if (!fsb) {
index 71fffa17442530949dccf45aa210c2e371188a59..b4277f58f40c395a575963c2d4726b364a597aa9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *   (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
+ *   (c) 2003-2006 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
  *  Based upon datasheets & sample CPUs kindly provided by AMD.
  *
  *  Valuable input gratefully received from Dave Jones, Pavel Machek,
- *  Dominik Brodowski, and others.
+ *  Dominik Brodowski, Jacob Shin, and others.
  *  Originally developed by Paul Devriendt.
  *  Processor information obtained from Chapter 9 (Power and Thermal Management)
  *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
  *  Opteron Processors" available for download from www.amd.com
  *
- *  Tables for specific CPUs can be infrerred from
+ *  Tables for specific CPUs can be inferred from
  *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
  */
 
@@ -46,7 +46,7 @@
 
 #define PFX "powernow-k8: "
 #define BFX PFX "BIOS error: "
-#define VERSION "version 1.60.2"
+#define VERSION "version 2.00.00"
 #include "powernow-k8.h"
 
 /* serialize freq changes  */
@@ -54,6 +54,8 @@ static DEFINE_MUTEX(fidvid_mutex);
 
 static struct powernow_k8_data *powernow_data[NR_CPUS];
 
+static int cpu_family = CPU_OPTERON;
+
 #ifndef CONFIG_SMP
 static cpumask_t cpu_core_map[1];
 #endif
@@ -64,16 +66,36 @@ static u32 find_freq_from_fid(u32 fid)
        return 800 + (fid * 100);
 }
 
+
 /* Return a frequency in KHz, given an input fid */
 static u32 find_khz_freq_from_fid(u32 fid)
 {
        return 1000 * find_freq_from_fid(fid);
 }
 
-/* Return a voltage in miliVolts, given an input vid */
-static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid)
+/* Return a frequency in MHz, given an input fid and did */
+static u32 find_freq_from_fiddid(u32 fid, u32 did)
+{
+       return 100 * (fid + 0x10) >> did;
+}
+
+static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
 {
-       return 1550-vid*25;
+       return 1000 * find_freq_from_fiddid(fid, did);
+}
+
+static u32 find_fid_from_pstate(u32 pstate)
+{
+       u32 hi, lo;
+       rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+       return lo & HW_PSTATE_FID_MASK;
+}
+
+static u32 find_did_from_pstate(u32 pstate)
+{
+       u32 hi, lo;
+       rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+       return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
 }
 
 /* Return the vco fid for an input fid
@@ -98,6 +120,9 @@ static int pending_bit_stuck(void)
 {
        u32 lo, hi;
 
+       if (cpu_family)
+               return 0;
+
        rdmsr(MSR_FIDVID_STATUS, lo, hi);
        return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
 }
@@ -111,6 +136,14 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
        u32 lo, hi;
        u32 i = 0;
 
+       if (cpu_family) {
+               rdmsr(MSR_PSTATE_STATUS, lo, hi);
+               i = lo & HW_PSTATE_MASK;
+               rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
+               data->currfid = lo & HW_PSTATE_FID_MASK;
+               data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+               return 0;
+       }
        do {
                if (i++ > 10000) {
                        dprintk("detected change pending stuck\n");
@@ -175,7 +208,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
        do {
                wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
                if (i++ > 100) {
-                       printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+                       printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
                        return 1;
                }
        } while (query_current_values_with_pending_wait(data));
@@ -255,7 +288,15 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid,
        return 0;
 }
 
-/* Change the fid and vid, by the 3 phases. */
+/* Change hardware pstate by single MSR write */
+static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
+{
+       wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+       data->currfid = find_fid_from_pstate(pstate);
+       return 0;
+}
+
+/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
 static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
 {
        if (core_voltage_pre_transition(data, reqvid))
@@ -474,26 +515,35 @@ static int check_supported_cpu(unsigned int cpu)
                goto out;
 
        eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-       if ((eax & CPUID_XFAM) != CPUID_XFAM_K8)
+       if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
+           ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
                goto out;
 
-       if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
-           ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
-               printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
-               goto out;
-       }
+       if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
+               if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+                   ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
+                       printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+                       goto out;
+               }
 
-       eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
-       if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
-               printk(KERN_INFO PFX
-                      "No frequency change capabilities detected\n");
-               goto out;
-       }
+               eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+               if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+                       printk(KERN_INFO PFX
+                              "No frequency change capabilities detected\n");
+                       goto out;
+               }
 
-       cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-       if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
-               printk(KERN_INFO PFX "Power state transitions not supported\n");
-               goto out;
+               cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+               if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
+                       printk(KERN_INFO PFX "Power state transitions not supported\n");
+                       goto out;
+               }
+       } else { /* must be a HW Pstate capable processor */
+               cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+               if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
+                       cpu_family = CPU_HW_PSTATE;
+               else
+                       goto out;
        }
 
        rc = 1;
@@ -547,12 +597,18 @@ static void print_basics(struct powernow_k8_data *data)
 {
        int j;
        for (j = 0; j < data->numps; j++) {
-               if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID)
-                       printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j,
+               if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+                       if (cpu_family) {
+                       printk(KERN_INFO PFX "   %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8,
+                               (data->powernow_table[j].index & 0xff0000) >> 16,
+                               data->powernow_table[j].frequency/1000);
+                       } else {
+                       printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x\n", j,
                                data->powernow_table[j].index & 0xff,
                                data->powernow_table[j].frequency/1000,
-                               data->powernow_table[j].index >> 8,
-                               find_millivolts_from_vid(data, data->powernow_table[j].index >> 8));
+                               data->powernow_table[j].index >> 8);
+                       }
+               }
        }
        if (data->batps)
                printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
@@ -702,7 +758,7 @@ static int find_psb_table(struct powernow_k8_data *data)
 #ifdef CONFIG_X86_POWERNOW_K8_ACPI
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
 {
-       if (!data->acpi_data.state_count)
+       if (!data->acpi_data.state_count || cpu_family)
                return;
 
        data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
@@ -715,9 +771,8 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
 
 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
-       int i;
-       int cntlofreq = 0;
        struct cpufreq_frequency_table *powernow_table;
+       int ret_val;
 
        if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
                dprintk("register performance failed: bad ACPI data\n");
@@ -746,6 +801,85 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
                goto err_out;
        }
 
+       if (cpu_family)
+               ret_val = fill_powernow_table_pstate(data, powernow_table);
+       else
+               ret_val = fill_powernow_table_fidvid(data, powernow_table);
+       if (ret_val)
+               goto err_out_mem;
+
+       powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+       powernow_table[data->acpi_data.state_count].index = 0;
+       data->powernow_table = powernow_table;
+
+       /* fill in data */
+       data->numps = data->acpi_data.state_count;
+       print_basics(data);
+       powernow_k8_acpi_pst_values(data, 0);
+
+       /* notify BIOS that we exist */
+       acpi_processor_notify_smm(THIS_MODULE);
+
+       return 0;
+
+err_out_mem:
+       kfree(powernow_table);
+
+err_out:
+       acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+       /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+       data->acpi_data.state_count = 0;
+
+       return -ENODEV;
+}
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+       int i;
+
+       for (i = 0; i < data->acpi_data.state_count; i++) {
+               u32 index;
+               u32 hi = 0, lo = 0;
+               u32 fid;
+               u32 did;
+
+               index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+               if (index > MAX_HW_PSTATE) {
+                       printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
+                       printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
+               }
+               rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+               if (!(hi & HW_PSTATE_VALID_MASK)) {
+                       dprintk("invalid pstate %d, ignoring\n", index);
+                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       continue;
+               }
+
+               fid = lo & HW_PSTATE_FID_MASK;
+               did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+
+               dprintk("   %d : fid 0x%x, did 0x%x\n", index, fid, did);
+
+               powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT);
+
+               powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did);
+
+               if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+                       printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
+                               powernow_table[i].frequency,
+                               (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+                       powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+                       continue;
+               }
+       }
+       return 0;
+}
+
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+       int i;
+       int cntlofreq = 0;
        for (i = 0; i < data->acpi_data.state_count; i++) {
                u32 fid;
                u32 vid;
@@ -786,7 +920,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
                                if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
                                    (powernow_table[i].index != powernow_table[cntlofreq].index)) {
                                        printk(KERN_ERR PFX "Too many lo freq table entries\n");
-                                       goto err_out_mem;
+                                       return 1;
                                }
 
                                dprintk("double low frequency table entry, ignoring it.\n");
@@ -804,31 +938,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
                        continue;
                }
        }
-
-       powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
-       powernow_table[data->acpi_data.state_count].index = 0;
-       data->powernow_table = powernow_table;
-
-       /* fill in data */
-       data->numps = data->acpi_data.state_count;
-       print_basics(data);
-       powernow_k8_acpi_pst_values(data, 0);
-
-       /* notify BIOS that we exist */
-       acpi_processor_notify_smm(THIS_MODULE);
-
        return 0;
-
-err_out_mem:
-       kfree(powernow_table);
-
-err_out:
-       acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
-       /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
-       data->acpi_data.state_count = 0;
-
-       return -ENODEV;
 }
 
 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
@@ -844,20 +954,20 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
 #endif /* CONFIG_X86_POWERNOW_K8_ACPI */
 
 /* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
 {
-       u32 fid;
-       u32 vid;
+       u32 fid = 0;
+       u32 vid = 0;
        int res, i;
        struct cpufreq_freqs freqs;
 
        dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
 
+       /* fid/vid correctness check for k8 */
        /* fid are the lower 8 bits of the index we stored into
-        * the cpufreq frequency table in find_psb_table, vid are
-        * the upper 8 bits.
+        * the cpufreq frequency table in find_psb_table, vid
+        * are the upper 8 bits.
         */
-
        fid = data->powernow_table[index].index & 0xFF;
        vid = (data->powernow_table[index].index & 0xFF00) >> 8;
 
@@ -881,22 +991,58 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde
 
        dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
                smp_processor_id(), fid, vid);
-
-       freqs.cpu = data->cpu;
        freqs.old = find_khz_freq_from_fid(data->currfid);
        freqs.new = find_khz_freq_from_fid(fid);
-       for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
+
+       for_each_cpu_mask(i, *(data->available_cores)) {
                freqs.cpu = i;
                cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
        }
 
        res = transition_fid_vid(data, fid, vid);
-
        freqs.new = find_khz_freq_from_fid(data->currfid);
-       for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
+
+       for_each_cpu_mask(i, *(data->available_cores)) {
                freqs.cpu = i;
                cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-        }
+       }
+       return res;
+}
+
+/* Take a frequency, and issue the hardware pstate transition command */
+static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+{
+       u32 fid = 0;
+       u32 did = 0;
+       u32 pstate = 0;
+       int res, i;
+       struct cpufreq_freqs freqs;
+
+       dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+       /* get fid did for hardware pstate transition */
+       pstate = index & HW_PSTATE_MASK;
+       if (pstate > MAX_HW_PSTATE)
+               return 0;
+       fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT;
+       did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT;
+       freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+       freqs.new = find_khz_freq_from_fiddid(fid, did);
+
+       for_each_cpu_mask(i, *(data->available_cores)) {
+               freqs.cpu = i;
+               cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+       }
+
+       res = transition_pstate(data, pstate);
+       data->currfid = find_fid_from_pstate(pstate);
+       data->currdid = find_did_from_pstate(pstate);
+       freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+
+       for_each_cpu_mask(i, *(data->available_cores)) {
+               freqs.cpu = i;
+               cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+       }
        return res;
 }
 
@@ -933,18 +1079,21 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
        dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
                pol->cpu, targfreq, pol->min, pol->max, relation);
 
-       if (query_current_values_with_pending_wait(data)) {
-               ret = -EIO;
+       if (query_current_values_with_pending_wait(data))
                goto err_out;
-       }
 
-       dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+       if (cpu_family)
+               dprintk("targ: curr fid 0x%x, did 0x%x\n",
+                       data->currfid, data->currvid);
+       else {
+               dprintk("targ: curr fid 0x%x, vid 0x%x\n",
                data->currfid, data->currvid);
 
-       if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
-               printk(KERN_INFO PFX
-                       "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
-                       checkfid, data->currfid, checkvid, data->currvid);
+               if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+                       printk(KERN_INFO PFX
+                               "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
+                               checkfid, data->currfid, checkvid, data->currvid);
+               }
        }
 
        if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
@@ -954,7 +1103,11 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
 
        powernow_k8_acpi_pst_values(data, newstate);
 
-       if (transition_frequency(data, newstate)) {
+       if (cpu_family)
+               ret = transition_frequency_pstate(data, newstate);
+       else
+               ret = transition_frequency_fidvid(data, newstate);
+       if (ret) {
                printk(KERN_ERR PFX "transition frequency failed\n");
                ret = 1;
                mutex_unlock(&fidvid_mutex);
@@ -962,7 +1115,10 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
        }
        mutex_unlock(&fidvid_mutex);
 
-       pol->cur = find_khz_freq_from_fid(data->currfid);
+       if (cpu_family)
+               pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+       else
+               pol->cur = find_khz_freq_from_fid(data->currfid);
        ret = 0;
 
 err_out:
@@ -1007,14 +1163,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
                 * Use the PSB BIOS structure. This is only availabe on
                 * an UP version, and is deprecated by AMD.
                 */
-
                if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
                        printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
                        kfree(data);
                        return -ENODEV;
                }
                if (pol->cpu != 0) {
-                       printk(KERN_ERR PFX "init not cpu 0\n");
+                       printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n");
                        kfree(data);
                        return -ENODEV;
                }
@@ -1042,20 +1197,28 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
        if (query_current_values_with_pending_wait(data))
                goto err_out;
 
-       fidvid_msr_init();
+       if (!cpu_family)
+               fidvid_msr_init();
 
        /* run on any CPU again */
        set_cpus_allowed(current, oldmask);
 
        pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
-       pol->cpus = cpu_core_map[pol->cpu];
+       if (cpu_family)
+               pol->cpus = cpumask_of_cpu(pol->cpu);
+       else
+               pol->cpus = cpu_core_map[pol->cpu];
+       data->available_cores = &(pol->cpus);
 
        /* Take a crude guess here.
         * That guess was in microseconds, so multiply with 1000 */
        pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
            + (3 * (1 << data->irt) * 10)) * 1000;
 
-       pol->cur = find_khz_freq_from_fid(data->currfid);
+       if (cpu_family)
+               pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+       else
+               pol->cur = find_khz_freq_from_fid(data->currfid);
        dprintk("policy current frequency %d kHz\n", pol->cur);
 
        /* min/max the cpu is capable of */
@@ -1069,8 +1232,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 
        cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
-       printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
-              data->currfid, data->currvid);
+       if (cpu_family)
+               dprintk("cpu_init done, current fid 0x%x, did 0x%x\n",
+                       data->currfid, data->currdid);
+       else
+               dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+                       data->currfid, data->currvid);
 
        powernow_data[pol->cpu] = data;
 
@@ -1156,8 +1323,9 @@ static int __cpuinit powernowk8_init(void)
        }
 
        if (supported_cpus == num_online_cpus()) {
-               printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron "
-                       "processors (" VERSION ")\n", supported_cpus);
+               printk(KERN_INFO PFX "Found %d %s "
+                       "processors (" VERSION ")\n", supported_cpus,
+                       boot_cpu_data.x86_model_id);
                return cpufreq_register_driver(&cpufreq_amd64_driver);
        }
 
index 79a7c5c87edcfe602dad0382d767824b88473db2..bf8ad9e43da3e4a006f5720c30c900bfa5e97a5c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
+ *  (c) 2003-2006 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
@@ -21,8 +21,8 @@ struct powernow_k8_data {
        u32 plllock; /* pll lock time, units 1 us */
         u32 exttype; /* extended interface = 1 */
 
-       /* keep track of the current fid / vid */
-       u32 currvid, currfid;
+       /* keep track of the current fid / vid or did */
+       u32 currvid, currfid, currdid;
 
        /* the powernow_table includes all frequency and vid/fid pairings:
         * fid are the lower 8 bits of the index, vid are the upper 8 bits.
@@ -34,6 +34,10 @@ struct powernow_k8_data {
         * used to determine valid frequency/vid/fid states */
        struct acpi_processor_performance acpi_data;
 #endif
+       /* we need to keep track of associated cores, but let cpufreq
+        * handle hotplug events - so just point at cpufreq pol->cpus
+        * structure */
+       cpumask_t *available_cores;
 };
 
 
@@ -43,6 +47,7 @@ struct powernow_k8_data {
 #define CPUID_XFAM_K8                  0
 #define CPUID_XMOD                     0x000f0000      /* extended model */
 #define CPUID_XMOD_REV_G               0x00060000
+#define CPUID_XFAM_10H                         0x00100000      /* family 0x10 */
 #define CPUID_USE_XFAM_XMOD            0x00000f00
 #define CPUID_GET_MAX_CAPABILITIES     0x80000000
 #define CPUID_FREQ_VOLT_CAPABILITIES   0x80000007
@@ -79,6 +84,32 @@ struct powernow_k8_data {
 #define MSR_S_HI_CURRENT_VID      0x0000003f
 #define MSR_C_HI_STP_GNT_BENIGN          0x00000001
 
+
+/* Hardware Pstate _PSS and MSR definitions */
+#define USE_HW_PSTATE          0x00000080
+#define HW_PSTATE_FID_MASK     0x0000003f
+#define HW_PSTATE_DID_MASK     0x000001c0
+#define HW_PSTATE_DID_SHIFT    6
+#define HW_PSTATE_MASK                 0x00000007
+#define HW_PSTATE_VALID_MASK   0x80000000
+#define HW_FID_INDEX_SHIFT     8
+#define HW_FID_INDEX_MASK      0x0000ff00
+#define HW_DID_INDEX_SHIFT     16
+#define HW_DID_INDEX_MASK      0x00ff0000
+#define HW_WATTS_MASK          0xff
+#define HW_PWR_DVR_MASK                0x300
+#define HW_PWR_DVR_SHIFT       8
+#define HW_PWR_MAX_MULT                3
+#define MAX_HW_PSTATE          8       /* hw pstate supports up to 8 */
+#define MSR_PSTATE_DEF_BASE    0xc0010064 /* base of Pstate MSRs */
+#define MSR_PSTATE_STATUS      0xc0010063 /* Pstate Status MSR */
+#define MSR_PSTATE_CTRL        0xc0010062 /* Pstate control MSR */
+
+/* define the two driver architectures */
+#define CPU_OPTERON 0
+#define CPU_HW_PSTATE 1
+
+
 /*
  * There are restrictions frequencies have to follow:
  * - only 1 entry in the low fid table ( <=1.4GHz )
@@ -182,6 +213,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
 
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
 
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+
 #ifdef CONFIG_SMP
 static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
 {
index b0ff9075708c4142d3f70eab9300547ca22f0666..ce54ff12c15d1b9c0c7df76065704ebfa5fa1232 100644 (file)
@@ -250,7 +250,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
        if (model->cpu_id == NULL) {
                /* No match at all */
-               dprintk(KERN_INFO PFX "no support for CPU model \"%s\": "
+               dprintk("no support for CPU model \"%s\": "
                       "send /proc/cpuinfo to " MAINTAINER "\n",
                       cpu->x86_model_id);
                return -ENOENT;
@@ -258,10 +258,10 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
        if (model->op_points == NULL) {
                /* Matched a non-match */
-               dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n",
+               dprintk("no table support for CPU model \"%s\"\n",
                       cpu->x86_model_id);
 #ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-               dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
+               dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
 #endif
                return -ENOENT;
        }
@@ -368,7 +368,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
 
        /* register with ACPI core */
        if (acpi_processor_register_performance(&p, cpu)) {
-               dprintk(KERN_INFO PFX "obtaining ACPI data failed\n");
+               dprintk("obtaining ACPI data failed\n");
                return -EIO;
        }
 
@@ -465,7 +465,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
        kfree(centrino_model[cpu]);
  err_unreg:
        acpi_processor_unregister_performance(&p, cpu);
-       dprintk(KERN_INFO PFX "invalid ACPI data\n");
+       dprintk("invalid ACPI data\n");
        return (result);
 }
 #else
@@ -499,7 +499,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
                        centrino_cpu[policy->cpu] = &cpu_ids[i];
 
                if (!centrino_cpu[policy->cpu]) {
-                       dprintk(KERN_INFO PFX "found unsupported CPU with "
+                       dprintk("found unsupported CPU with "
                        "Enhanced SpeedStep: send /proc/cpuinfo to "
                        MAINTAINER "\n");
                        return -ENODEV;
index aeb5ab2391e421faf03935992ffa3c0c817f4353..8b11cebe65df5ad396f8fb975e158466f9dd859b 100644 (file)
@@ -72,4 +72,6 @@ source "drivers/edac/Kconfig"
 
 source "drivers/rtc/Kconfig"
 
+source "drivers/dma/Kconfig"
+
 endmenu
index 447d8e68887a1f668312806e8cedd75c8c218247..3c5170310bd0d407277fe5bb36a5cef87de8ba3c 100644 (file)
@@ -74,3 +74,4 @@ obj-$(CONFIG_SGI_SN)          += sn/
 obj-y                          += firmware/
 obj-$(CONFIG_CRYPTO)           += crypto/
 obj-$(CONFIG_SUPERH)           += sh/
+obj-$(CONFIG_DMA_ENGINE)       += dma/
index 07bc6dfe662b34aeb4c83d7476d6cee31c10c70a..8920e8c6e246dba49a30da6c2539dae11ec69316 100644 (file)
@@ -812,6 +812,9 @@ static int irqrouter_resume(struct sys_device *dev)
 
        ACPI_FUNCTION_TRACE("irqrouter_resume");
 
+       /* Make sure SCI is enabled again (Apple firmware bug?) */
+       acpi_set_register(ACPI_BITREG_SCI_ENABLE, 1, ACPI_MTX_DO_NOT_LOCK);
+
        acpi_in_resume = 1;
        list_for_each(node, &acpi_link.entries) {
                link = list_entry(node, struct acpi_pci_link, node);
index fdff774b8ab944421ad0190486e98cc4c5f458a5..c1434ed118808999a3b05f148ef3b76b0590360f 100644 (file)
@@ -116,8 +116,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
        skb = skb_share_check(skb, GFP_ATOMIC);
        if (skb == NULL)
                return 0;
-       if (skb_is_nonlinear(skb))
-       if (skb_linearize(skb, GFP_ATOMIC) < 0)
+       if (skb_linearize(skb))
                goto exit;
        if (!is_aoe_netif(ifp))
                goto exit;
index 2b5838e64751a3bc3cb0ba005deba67b88cbfdc9..b4e00a343da9162e559f305794ae31cc99842285 100644 (file)
@@ -46,12 +46,6 @@ struct vm_operations_struct alpha_core_agp_vm_ops = {
 };
 
 
-static int alpha_core_agp_nop(void)
-{
-       /* just return success */
-       return 0;
-}
-
 static int alpha_core_agp_fetch_size(void)
 {
        return alpha_core_agp_sizes[0].size;
@@ -120,6 +114,11 @@ static int alpha_core_agp_remove_memory(struct agp_memory *mem, off_t pg_start,
        return status;
 }
 
+static int alpha_core_agp_create_free_gatt_table(struct agp_bridge_data *a)
+{
+       return 0;
+}
+
 struct agp_bridge_driver alpha_core_agp_driver = {
        .owner                  = THIS_MODULE,
        .aperture_sizes         = alpha_core_agp_sizes,
@@ -135,8 +134,8 @@ struct agp_bridge_driver alpha_core_agp_driver = {
        .tlb_flush              = alpha_core_agp_tlbflush,
        .mask_memory            = agp_generic_mask_memory,
        .cache_flush            = global_cache_flush,
-       .create_gatt_table      = alpha_core_agp_nop,
-       .free_gatt_table        = alpha_core_agp_nop,
+       .create_gatt_table      = alpha_core_agp_create_free_gatt_table,
+       .free_gatt_table        = alpha_core_agp_create_free_gatt_table,
        .insert_memory          = alpha_core_agp_insert_memory,
        .remove_memory          = alpha_core_agp_remove_memory,
        .alloc_by_type          = agp_generic_alloc_by_type,
index 4e1891e2c035c97336534bdd9a8ec6b1214d6a37..a92ab53a13703bc4c7df95380613ab2062166503 100644 (file)
@@ -809,12 +809,10 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
                                case U32_APER_SIZE:
                                        bridge->current_size = A_IDX32(bridge);
                                        break;
-                                       /* This case will never really happen. */
+                               /* These cases will never really happen. */
                                case FIXED_APER_SIZE:
                                case LVL2_APER_SIZE:
                                default:
-                                       bridge->current_size =
-                                           bridge->current_size;
                                        break;
                                }
                                temp = bridge->current_size;
index bddcae54b16d08d4cfff5ab427c347c9b4f07476..61ac3809f997cbbb9932a8eb3f0428d63195a2d4 100644 (file)
@@ -736,7 +736,7 @@ static int intel_i915_remove_entries(struct agp_memory *mem,off_t pg_start,
 static int intel_i915_fetch_size(void)
 {
        struct aper_size_info_fixed *values;
-       u32 temp, offset = 0;
+       u32 temp, offset;
 
 #define I915_256MB_ADDRESS_MASK (1<<27)
 
index 9846defbddb4def2f2ba71a750d48036e5b6ceef..1de1b12043bf09f92db60863ce316780cf3422ed 100644 (file)
@@ -329,7 +329,7 @@ static int agp_uninorth_suspend(struct pci_dev *pdev)
        /* turn off AGP on the bridge */
        agp = pci_find_capability(pdev, PCI_CAP_ID_AGP);
        pci_read_config_dword(pdev, agp + PCI_AGP_COMMAND, &cmd);
-       bridge->dev_private_data = (void *)cmd;
+       bridge->dev_private_data = (void *)(long)cmd;
        if (cmd & PCI_AGP_COMMAND_AGP) {
                printk("uninorth-agp: disabling AGP on bridge %s\n",
                                pci_name(pdev));
@@ -351,7 +351,7 @@ static int agp_uninorth_resume(struct pci_dev *pdev)
        if (bridge == NULL)
                return -ENODEV;
 
-       command = (u32)bridge->dev_private_data;
+       command = (long)bridge->dev_private_data;
        bridge->dev_private_data = NULL;
        if (!(command & PCI_AGP_COMMAND_AGP))
                return 0;
index 9f2f00d82917a748fb021cffcbac4a274e1434a9..05f8ce2cfb4ab77a950a54371c8559b7fa17ef49 100644 (file)
@@ -127,7 +127,7 @@ void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id)
 
        if (found) {
                cn_queue_free_callback(cbq);
-               atomic_dec_and_test(&dev->refcnt);
+               atomic_dec(&dev->refcnt);
        }
 }
 
index 29b2fa5534ae3221017a49517a5017e56c328b4b..44d1eca83a7250748bf27c637998c3c2973f9657 100644 (file)
@@ -257,7 +257,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
                if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
                        if ((policy) && (policy->cpu == freqs->cpu) &&
                            (policy->cur) && (policy->cur != freqs->old)) {
-                               dprintk(KERN_WARNING "Warning: CPU frequency is"
+                               dprintk("Warning: CPU frequency is"
                                        " %u, cpufreq assumed %u kHz.\n",
                                        freqs->old, policy->cur);
                                freqs->old = policy->cur;
@@ -874,7 +874,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne
 {
        struct cpufreq_freqs freqs;
 
-       dprintk(KERN_WARNING "Warning: CPU frequency out of sync: cpufreq and timing "
+       dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
               "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
 
        freqs.cpu = cpu;
@@ -1006,7 +1006,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
                struct cpufreq_freqs freqs;
 
                if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-                       dprintk(KERN_DEBUG "Warning: CPU frequency is %u, "
+                       dprintk("Warning: CPU frequency is %u, "
                               "cpufreq assumed %u kHz.\n",
                               cur_freq, cpu_policy->cur);
 
@@ -1087,7 +1087,7 @@ static int cpufreq_resume(struct sys_device * sysdev)
                        struct cpufreq_freqs freqs;
 
                        if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
-                               dprintk(KERN_WARNING "Warning: CPU frequency"
+                               dprintk("Warning: CPU frequency"
                                       "is %u, cpufreq assumed %u kHz.\n",
                                       cur_freq, cpu_policy->cur);
 
index 9694b6ed3268edfe333214dbc71712c5f28e0fc8..c576c0b3f452629560fd2567a1bc5735011daf92 100644 (file)
@@ -74,7 +74,7 @@ static ssize_t
 show_total_trans(struct cpufreq_policy *policy, char *buf)
 {
        struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
-       if(!stat)
+       if (!stat)
                return 0;
        return sprintf(buf, "%d\n",
                        cpufreq_stats_table[stat->cpu]->total_trans);
@@ -86,7 +86,7 @@ show_time_in_state(struct cpufreq_policy *policy, char *buf)
        ssize_t len = 0;
        int i;
        struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
-       if(!stat)
+       if (!stat)
                return 0;
        cpufreq_stats_update(stat->cpu);
        for (i = 0; i < stat->state_num; i++) {
@@ -104,7 +104,7 @@ show_trans_table(struct cpufreq_policy *policy, char *buf)
        int i, j;
 
        struct cpufreq_stats *stat = cpufreq_stats_table[policy->cpu];
-       if(!stat)
+       if (!stat)
                return 0;
        cpufreq_stats_update(stat->cpu);
        len += snprintf(buf + len, PAGE_SIZE - len, "   From  :    To\n");
index a4818ce889195d81ba5e70e0a19582410e8ac716..551f4ccf87fd9eba99657b713250dd23c5723ad3 100644 (file)
@@ -20,7 +20,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 {
        unsigned int min_freq = ~0;
        unsigned int max_freq = 0;
-       unsigned int i = 0;
+       unsigned int i;
 
        for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
                unsigned int freq = table[i].frequency;
@@ -51,7 +51,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
                                   struct cpufreq_frequency_table *table)
 {
        unsigned int next_larger = ~0;
-       unsigned int i = 0;
+       unsigned int i;
        unsigned int count = 0;
 
        dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n", policy->min, policy->max, policy->cpu);
@@ -91,20 +91,24 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
                                   unsigned int relation,
                                   unsigned int *index)
 {
-       struct cpufreq_frequency_table optimal = { .index = ~0, };
-       struct cpufreq_frequency_table suboptimal = { .index = ~0, };
+       struct cpufreq_frequency_table optimal = {
+               .index = ~0,
+               .frequency = 0,
+       };
+       struct cpufreq_frequency_table suboptimal = {
+               .index = ~0,
+               .frequency = 0,
+       };
        unsigned int i;
 
        dprintk("request for target %u kHz (relation: %u) for cpu %u\n", target_freq, relation, policy->cpu);
 
        switch (relation) {
        case CPUFREQ_RELATION_H:
-               optimal.frequency = 0;
                suboptimal.frequency = ~0;
                break;
        case CPUFREQ_RELATION_L:
                optimal.frequency = ~0;
-               suboptimal.frequency = 0;
                break;
        }
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644 (file)
index 0000000..30d021d
--- /dev/null
@@ -0,0 +1,34 @@
+#
+# DMA engine configuration
+#
+
+menu "DMA Engine support"
+
+config DMA_ENGINE
+       bool "Support for DMA engines"
+       ---help---
+         DMA engines offload copy operations from the CPU to dedicated
+         hardware, allowing the copies to happen asynchronously.
+
+comment "DMA Clients"
+
+config NET_DMA
+       bool "Network: TCP receive copy offload"
+       depends on DMA_ENGINE && NET
+       default y
+       ---help---
+         This enables the use of DMA engines in the network stack to
+         offload receive copy-to-user operations, freeing CPU cycles.
+         Since this is the main user of the DMA engine, it should be enabled;
+         say Y here.
+
+comment "DMA Devices"
+
+config INTEL_IOATDMA
+       tristate "Intel I/OAT DMA support"
+       depends on DMA_ENGINE && PCI
+       default m
+       ---help---
+         Enable support for the Intel(R) I/OAT DMA engine.
+
+endmenu
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644 (file)
index 0000000..bdcfdbd
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644 (file)
index 0000000..5829143
--- /dev/null
@@ -0,0 +1,408 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps two global lists, dma_device_list and dma_client_list.
+ * Both of these are protected by a mutex, dma_list_mutex.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * Each client has a channels list, it's only modified under the client->lock
+ * and in an RCU callback, so it's safe to read under rcu_read_lock().
+ *
+ * Each device has a kref, which is initialized to 1 when the device is
+ * registered. A kref_put is done for each class_device registered.  When the
+ * class_device is released, the coresponding kref_put is done in the release
+ * method. Every time one of the device's channels is allocated to a client,
+ * a kref_get occurs.  When the channel is freed, the coresponding kref_put
+ * happens. The device's release function does a completion, so
+ * unregister_device does a remove event, class_device_unregister, a kref_put
+ * for the first reference, then waits on the completion for all other
+ * references to finish.
+ *
+ * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
+ * with a kref and a per_cpu local_t.  A single reference is set when on an
+ * ADDED event, and removed with a REMOVE event.  Net DMA client takes an
+ * extra reference per outstanding transaction.  The relase function does a
+ * kref_put on the device. -ChrisL
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static LIST_HEAD(dma_device_list);
+static LIST_HEAD(dma_client_list);
+
+/* --- sysfs implementation --- */
+
+static ssize_t show_memcpy_count(struct class_device *cd, char *buf)
+{
+       struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
+       unsigned long count = 0;
+       int i;
+
+       for_each_possible_cpu(i)
+               count += per_cpu_ptr(chan->local, i)->memcpy_count;
+
+       return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_bytes_transferred(struct class_device *cd, char *buf)
+{
+       struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
+       unsigned long count = 0;
+       int i;
+
+       for_each_possible_cpu(i)
+               count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+
+       return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_in_use(struct class_device *cd, char *buf)
+{
+       struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
+
+       return sprintf(buf, "%d\n", (chan->client ? 1 : 0));
+}
+
+static struct class_device_attribute dma_class_attrs[] = {
+       __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
+       __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
+       __ATTR(in_use, S_IRUGO, show_in_use, NULL),
+       __ATTR_NULL
+};
+
+static void dma_async_device_cleanup(struct kref *kref);
+
+static void dma_class_dev_release(struct class_device *cd)
+{
+       struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
+       kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+
+static struct class dma_devclass = {
+       .name            = "dma",
+       .class_dev_attrs = dma_class_attrs,
+       .release = dma_class_dev_release,
+};
+
+/* --- client and device registration --- */
+
+/**
+ * dma_client_chan_alloc - try to allocate a channel to a client
+ * @client: &dma_client
+ *
+ * Called with dma_list_mutex held.
+ */
+static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
+{
+       struct dma_device *device;
+       struct dma_chan *chan;
+       unsigned long flags;
+       int desc;       /* allocated descriptor count */
+
+       /* Find a channel, any DMA engine will do */
+       list_for_each_entry(device, &dma_device_list, global_node) {
+               list_for_each_entry(chan, &device->channels, device_node) {
+                       if (chan->client)
+                               continue;
+
+                       desc = chan->device->device_alloc_chan_resources(chan);
+                       if (desc >= 0) {
+                               kref_get(&device->refcount);
+                               kref_init(&chan->refcount);
+                               chan->slow_ref = 0;
+                               INIT_RCU_HEAD(&chan->rcu);
+                               chan->client = client;
+                               spin_lock_irqsave(&client->lock, flags);
+                               list_add_tail_rcu(&chan->client_node,
+                                                 &client->channels);
+                               spin_unlock_irqrestore(&client->lock, flags);
+                               return chan;
+                       }
+               }
+       }
+
+       return NULL;
+}
+
+/**
+ * dma_client_chan_free - release a DMA channel
+ * @chan: &dma_chan
+ */
+void dma_chan_cleanup(struct kref *kref)
+{
+       struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
+       chan->device->device_free_chan_resources(chan);
+       chan->client = NULL;
+       kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+
+static void dma_chan_free_rcu(struct rcu_head *rcu)
+{
+       struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu);
+       int bias = 0x7FFFFFFF;
+       int i;
+       for_each_possible_cpu(i)
+               bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount);
+       atomic_sub(bias, &chan->refcount.refcount);
+       kref_put(&chan->refcount, dma_chan_cleanup);
+}
+
+static void dma_client_chan_free(struct dma_chan *chan)
+{
+       atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
+       chan->slow_ref = 1;
+       call_rcu(&chan->rcu, dma_chan_free_rcu);
+}
+
+/**
+ * dma_chans_rebalance - reallocate channels to clients
+ *
+ * When the number of DMA channel in the system changes,
+ * channels need to be rebalanced among clients
+ */
+static void dma_chans_rebalance(void)
+{
+       struct dma_client *client;
+       struct dma_chan *chan;
+       unsigned long flags;
+
+       mutex_lock(&dma_list_mutex);
+
+       list_for_each_entry(client, &dma_client_list, global_node) {
+               while (client->chans_desired > client->chan_count) {
+                       chan = dma_client_chan_alloc(client);
+                       if (!chan)
+                               break;
+                       client->chan_count++;
+                       client->event_callback(client,
+                                              chan,
+                                              DMA_RESOURCE_ADDED);
+               }
+               while (client->chans_desired < client->chan_count) {
+                       spin_lock_irqsave(&client->lock, flags);
+                       chan = list_entry(client->channels.next,
+                                         struct dma_chan,
+                                         client_node);
+                       list_del_rcu(&chan->client_node);
+                       spin_unlock_irqrestore(&client->lock, flags);
+                       client->chan_count--;
+                       client->event_callback(client,
+                                              chan,
+                                              DMA_RESOURCE_REMOVED);
+                       dma_client_chan_free(chan);
+               }
+       }
+
+       mutex_unlock(&dma_list_mutex);
+}
+
+/**
+ * dma_async_client_register - allocate and register a &dma_client
+ * @event_callback: callback for notification of channel addition/removal
+ */
+struct dma_client *dma_async_client_register(dma_event_callback event_callback)
+{
+       struct dma_client *client;
+
+       client = kzalloc(sizeof(*client), GFP_KERNEL);
+       if (!client)
+               return NULL;
+
+       INIT_LIST_HEAD(&client->channels);
+       spin_lock_init(&client->lock);
+       client->chans_desired = 0;
+       client->chan_count = 0;
+       client->event_callback = event_callback;
+
+       mutex_lock(&dma_list_mutex);
+       list_add_tail(&client->global_node, &dma_client_list);
+       mutex_unlock(&dma_list_mutex);
+
+       return client;
+}
+
+/**
+ * dma_async_client_unregister - unregister a client and free the &dma_client
+ * @client:
+ *
+ * Force frees any allocated DMA channels, frees the &dma_client memory
+ */
+void dma_async_client_unregister(struct dma_client *client)
+{
+       struct dma_chan *chan;
+
+       if (!client)
+               return;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(chan, &client->channels, client_node)
+               dma_client_chan_free(chan);
+       rcu_read_unlock();
+
+       mutex_lock(&dma_list_mutex);
+       list_del(&client->global_node);
+       mutex_unlock(&dma_list_mutex);
+
+       kfree(client);
+       dma_chans_rebalance();
+}
+
+/**
+ * dma_async_client_chan_request - request DMA channels
+ * @client: &dma_client
+ * @number: count of DMA channels requested
+ *
+ * Clients call dma_async_client_chan_request() to specify how many
+ * DMA channels they need, 0 to free all currently allocated.
+ * The resulting allocations/frees are indicated to the client via the
+ * event callback.
+ */
+void dma_async_client_chan_request(struct dma_client *client,
+                       unsigned int number)
+{
+       client->chans_desired = number;
+       dma_chans_rebalance();
+}
+
+/**
+ * dma_async_device_register -
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+       static int id;
+       int chancnt = 0;
+       struct dma_chan* chan;
+
+       if (!device)
+               return -ENODEV;
+
+       init_completion(&device->done);
+       kref_init(&device->refcount);
+       device->dev_id = id++;
+
+       /* represent channels in sysfs. Probably want devs too */
+       list_for_each_entry(chan, &device->channels, device_node) {
+               chan->local = alloc_percpu(typeof(*chan->local));
+               if (chan->local == NULL)
+                       continue;
+
+               chan->chan_id = chancnt++;
+               chan->class_dev.class = &dma_devclass;
+               chan->class_dev.dev = NULL;
+               snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
+                        device->dev_id, chan->chan_id);
+
+               kref_get(&device->refcount);
+               class_device_register(&chan->class_dev);
+       }
+
+       mutex_lock(&dma_list_mutex);
+       list_add_tail(&device->global_node, &dma_device_list);
+       mutex_unlock(&dma_list_mutex);
+
+       dma_chans_rebalance();
+
+       return 0;
+}
+
+/**
+ * dma_async_device_unregister -
+ * @device: &dma_device
+ */
+static void dma_async_device_cleanup(struct kref *kref)
+{
+       struct dma_device *device;
+
+       device = container_of(kref, struct dma_device, refcount);
+       complete(&device->done);
+}
+
+void dma_async_device_unregister(struct dma_device* device)
+{
+       struct dma_chan *chan;
+       unsigned long flags;
+
+       mutex_lock(&dma_list_mutex);
+       list_del(&device->global_node);
+       mutex_unlock(&dma_list_mutex);
+
+       list_for_each_entry(chan, &device->channels, device_node) {
+               if (chan->client) {
+                       spin_lock_irqsave(&chan->client->lock, flags);
+                       list_del(&chan->client_node);
+                       chan->client->chan_count--;
+                       spin_unlock_irqrestore(&chan->client->lock, flags);
+                       chan->client->event_callback(chan->client,
+                                                    chan,
+                                                    DMA_RESOURCE_REMOVED);
+                       dma_client_chan_free(chan);
+               }
+               class_device_unregister(&chan->class_dev);
+       }
+       dma_chans_rebalance();
+
+       kref_put(&device->refcount, dma_async_device_cleanup);
+       wait_for_completion(&device->done);
+}
+
+static int __init dma_bus_init(void)
+{
+       mutex_init(&dma_list_mutex);
+       return class_register(&dma_devclass);
+}
+
+subsys_initcall(dma_bus_init);
+
+EXPORT_SYMBOL(dma_async_client_register);
+EXPORT_SYMBOL(dma_async_client_unregister);
+EXPORT_SYMBOL(dma_async_client_chan_request);
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+EXPORT_SYMBOL(dma_async_memcpy_complete);
+EXPORT_SYMBOL(dma_async_memcpy_issue_pending);
+EXPORT_SYMBOL(dma_async_device_register);
+EXPORT_SYMBOL(dma_async_device_unregister);
+EXPORT_SYMBOL(dma_chan_cleanup);
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
new file mode 100644 (file)
index 0000000..0fdf7fb
--- /dev/null
@@ -0,0 +1,840 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include "ioatdma.h"
+#include "ioatdma_io.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
+#define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+
+/* internal functions */
+static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int enumerate_dma_channels(struct ioat_device *device)
+{
+       u8 xfercap_scale;
+       u32 xfercap;
+       int i;
+       struct ioat_dma_chan *ioat_chan;
+
+       device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET);
+       xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET);
+       xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+
+       for (i = 0; i < device->common.chancnt; i++) {
+               ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
+               if (!ioat_chan) {
+                       device->common.chancnt = i;
+                       break;
+               }
+
+               ioat_chan->device = device;
+               ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
+               ioat_chan->xfercap = xfercap;
+               spin_lock_init(&ioat_chan->cleanup_lock);
+               spin_lock_init(&ioat_chan->desc_lock);
+               INIT_LIST_HEAD(&ioat_chan->free_desc);
+               INIT_LIST_HEAD(&ioat_chan->used_desc);
+               /* This should be made common somewhere in dmaengine.c */
+               ioat_chan->common.device = &device->common;
+               ioat_chan->common.client = NULL;
+               list_add_tail(&ioat_chan->common.device_node,
+                             &device->common.channels);
+       }
+       return device->common.chancnt;
+}
+
+static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
+       struct ioat_dma_chan *ioat_chan,
+       int flags)
+{
+       struct ioat_dma_descriptor *desc;
+       struct ioat_desc_sw *desc_sw;
+       struct ioat_device *ioat_device;
+       dma_addr_t phys;
+
+       ioat_device = to_ioat_device(ioat_chan->common.device);
+       desc = pci_pool_alloc(ioat_device->dma_pool, flags, &phys);
+       if (unlikely(!desc))
+               return NULL;
+
+       desc_sw = kzalloc(sizeof(*desc_sw), flags);
+       if (unlikely(!desc_sw)) {
+               pci_pool_free(ioat_device->dma_pool, desc, phys);
+               return NULL;
+       }
+
+       memset(desc, 0, sizeof(*desc));
+       desc_sw->hw = desc;
+       desc_sw->phys = phys;
+
+       return desc_sw;
+}
+
+#define INITIAL_IOAT_DESC_COUNT 128
+
+static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan);
+
+/* returns the actual number of allocated descriptors */
+static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+       struct ioat_desc_sw *desc = NULL;
+       u16 chanctrl;
+       u32 chanerr;
+       int i;
+       LIST_HEAD(tmp_list);
+
+       /*
+        * In-use bit automatically set by reading chanctrl
+        * If 0, we got it, if 1, someone else did
+        */
+       chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
+       if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE)
+               return -EBUSY;
+
+        /* Setup register to interrupt and write completion status on error */
+       chanctrl = IOAT_CHANCTRL_CHANNEL_IN_USE |
+               IOAT_CHANCTRL_ERR_INT_EN |
+               IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
+               IOAT_CHANCTRL_ERR_COMPLETION_EN;
+        ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+
+       chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET);
+       if (chanerr) {
+               printk("IOAT: CHANERR = %x, clearing\n", chanerr);
+               ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr);
+       }
+
+       /* Allocate descriptors */
+       for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) {
+               desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
+               if (!desc) {
+                       printk(KERN_ERR "IOAT: Only %d initial descriptors\n", i);
+                       break;
+               }
+               list_add_tail(&desc->node, &tmp_list);
+       }
+       spin_lock_bh(&ioat_chan->desc_lock);
+       list_splice(&tmp_list, &ioat_chan->free_desc);
+       spin_unlock_bh(&ioat_chan->desc_lock);
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       ioat_chan->completion_virt =
+               pci_pool_alloc(ioat_chan->device->completion_pool,
+                              GFP_KERNEL,
+                              &ioat_chan->completion_addr);
+       memset(ioat_chan->completion_virt, 0,
+              sizeof(*ioat_chan->completion_virt));
+       ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW,
+                      ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF);
+       ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH,
+                      ((u64) ioat_chan->completion_addr) >> 32);
+
+       ioat_start_null_desc(ioat_chan);
+       return i;
+}
+
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
+
+static void ioat_dma_free_chan_resources(struct dma_chan *chan)
+{
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+       struct ioat_device *ioat_device = to_ioat_device(chan->device);
+       struct ioat_desc_sw *desc, *_desc;
+       u16 chanctrl;
+       int in_use_descs = 0;
+
+       ioat_dma_memcpy_cleanup(ioat_chan);
+
+       ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
+
+       spin_lock_bh(&ioat_chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
+               in_use_descs++;
+               list_del(&desc->node);
+               pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys);
+               kfree(desc);
+       }
+       list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
+               list_del(&desc->node);
+               pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys);
+               kfree(desc);
+       }
+       spin_unlock_bh(&ioat_chan->desc_lock);
+
+       pci_pool_free(ioat_device->completion_pool,
+                     ioat_chan->completion_virt,
+                     ioat_chan->completion_addr);
+
+       /* one is ok since we left it on there on purpose */
+       if (in_use_descs > 1)
+               printk(KERN_ERR "IOAT: Freeing %d in use descriptors!\n",
+                       in_use_descs - 1);
+
+       ioat_chan->last_completion = ioat_chan->completion_addr = 0;
+
+       /* Tell hw the chan is free */
+       chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET);
+       chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE;
+       ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+}
+
+/**
+ * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction
+ * @chan: IOAT DMA channel handle
+ * @dest: DMA destination address
+ * @src: DMA source address
+ * @len: transaction length in bytes
+ */
+
+static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
+                                       dma_addr_t dest,
+                                       dma_addr_t src,
+                                       size_t len)
+{
+       struct ioat_desc_sw *first;
+       struct ioat_desc_sw *prev;
+       struct ioat_desc_sw *new;
+       dma_cookie_t cookie;
+       LIST_HEAD(new_chain);
+       u32 copy;
+       size_t orig_len;
+       dma_addr_t orig_src, orig_dst;
+       unsigned int desc_count = 0;
+       unsigned int append = 0;
+
+       if (!ioat_chan || !dest || !src)
+               return -EFAULT;
+
+       if (!len)
+               return ioat_chan->common.cookie;
+
+       orig_len = len;
+       orig_src = src;
+       orig_dst = dest;
+
+       first = NULL;
+       prev = NULL;
+
+       spin_lock_bh(&ioat_chan->desc_lock);
+
+       while (len) {
+               if (!list_empty(&ioat_chan->free_desc)) {
+                       new = to_ioat_desc(ioat_chan->free_desc.next);
+                       list_del(&new->node);
+               } else {
+                       /* try to get another desc */
+                       new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+                       /* will this ever happen? */
+                       /* TODO add upper limit on these */
+                       BUG_ON(!new);
+               }
+
+               copy = min((u32) len, ioat_chan->xfercap);
+
+               new->hw->size = copy;
+               new->hw->ctl = 0;
+               new->hw->src_addr = src;
+               new->hw->dst_addr = dest;
+               new->cookie = 0;
+
+               /* chain together the physical address list for the HW */
+               if (!first)
+                       first = new;
+               else
+                       prev->hw->next = (u64) new->phys;
+
+               prev = new;
+
+               len  -= copy;
+               dest += copy;
+               src  += copy;
+
+               list_add_tail(&new->node, &new_chain);
+               desc_count++;
+       }
+       new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+       new->hw->next = 0;
+
+       /* cookie incr and addition to used_list must be atomic */
+
+       cookie = ioat_chan->common.cookie;
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+       ioat_chan->common.cookie = new->cookie = cookie;
+
+       pci_unmap_addr_set(new, src, orig_src);
+       pci_unmap_addr_set(new, dst, orig_dst);
+       pci_unmap_len_set(new, src_len, orig_len);
+       pci_unmap_len_set(new, dst_len, orig_len);
+
+       /* write address into NextDescriptor field of last desc in chain */
+       to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys;
+       list_splice_init(&new_chain, ioat_chan->used_desc.prev);
+
+       ioat_chan->pending += desc_count;
+       if (ioat_chan->pending >= 20) {
+               append = 1;
+               ioat_chan->pending = 0;
+       }
+
+       spin_unlock_bh(&ioat_chan->desc_lock);
+
+       if (append)
+               ioatdma_chan_write8(ioat_chan,
+                                   IOAT_CHANCMD_OFFSET,
+                                   IOAT_CHANCMD_APPEND);
+       return cookie;
+}
+
+/**
+ * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs
+ * @chan: IOAT DMA channel handle
+ * @dest: DMA destination address
+ * @src: DMA source address
+ * @len: transaction length in bytes
+ */
+
+static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan,
+                                               void *dest,
+                                               void *src,
+                                               size_t len)
+{
+       dma_addr_t dest_addr;
+       dma_addr_t src_addr;
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+       dest_addr = pci_map_single(ioat_chan->device->pdev,
+               dest, len, PCI_DMA_FROMDEVICE);
+       src_addr = pci_map_single(ioat_chan->device->pdev,
+               src, len, PCI_DMA_TODEVICE);
+
+       return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+}
+
+/**
+ * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page
+ * @chan: IOAT DMA channel handle
+ * @page: pointer to the page to copy to
+ * @offset: offset into that page
+ * @src: DMA source address
+ * @len: transaction length in bytes
+ */
+
+static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
+                                              struct page *page,
+                                              unsigned int offset,
+                                              void *src,
+                                              size_t len)
+{
+       dma_addr_t dest_addr;
+       dma_addr_t src_addr;
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+       dest_addr = pci_map_page(ioat_chan->device->pdev,
+               page, offset, len, PCI_DMA_FROMDEVICE);
+       src_addr = pci_map_single(ioat_chan->device->pdev,
+               src, len, PCI_DMA_TODEVICE);
+
+       return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+}
+
+/**
+ * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages
+ * @chan: IOAT DMA channel handle
+ * @dest_pg: pointer to the page to copy to
+ * @dest_off: offset into that page
+ * @src_pg: pointer to the page to copy from
+ * @src_off: offset into that page
+ * @len: transaction length in bytes. This is guaranteed to not make a copy
+ *      across a page boundary.
+ */
+
+static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
+                                             struct page *dest_pg,
+                                             unsigned int dest_off,
+                                             struct page *src_pg,
+                                             unsigned int src_off,
+                                             size_t len)
+{
+       dma_addr_t dest_addr;
+       dma_addr_t src_addr;
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+       dest_addr = pci_map_page(ioat_chan->device->pdev,
+               dest_pg, dest_off, len, PCI_DMA_FROMDEVICE);
+       src_addr = pci_map_page(ioat_chan->device->pdev,
+               src_pg, src_off, len, PCI_DMA_TODEVICE);
+
+       return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognoized appended descriptors to hw
+ * @chan: DMA channel handle
+ */
+
+static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (ioat_chan->pending != 0) {
+               ioat_chan->pending = 0;
+               ioatdma_chan_write8(ioat_chan,
+                                   IOAT_CHANCMD_OFFSET,
+                                   IOAT_CHANCMD_APPEND);
+       }
+}
+
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
+{
+       unsigned long phys_complete;
+       struct ioat_desc_sw *desc, *_desc;
+       dma_cookie_t cookie = 0;
+
+       prefetch(chan->completion_virt);
+
+       if (!spin_trylock(&chan->cleanup_lock))
+               return;
+
+       /* The completion writeback can happen at any time,
+          so reads by the driver need to be atomic operations
+          The descriptor physical addresses are limited to 32-bits
+          when the CPU can only do a 32-bit mov */
+
+#if (BITS_PER_LONG == 64)
+       phys_complete =
+       chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+       phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+       if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
+               IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
+               printk("IOAT: Channel halted, chanerr = %x\n",
+                       ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET));
+
+               /* TODO do something to salvage the situation */
+       }
+
+       if (phys_complete == chan->last_completion) {
+               spin_unlock(&chan->cleanup_lock);
+               return;
+       }
+
+       spin_lock_bh(&chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &chan->used_desc, node) {
+
+               /*
+                * Incoming DMA requests may use multiple descriptors, due to
+                * exceeding xfercap, perhaps. If so, only the last one will
+                * have a cookie, and require unmapping.
+                */
+               if (desc->cookie) {
+                       cookie = desc->cookie;
+
+                       /* yes we are unmapping both _page and _single alloc'd
+                          regions with unmap_page. Is this *really* that bad?
+                       */
+                       pci_unmap_page(chan->device->pdev,
+                                       pci_unmap_addr(desc, dst),
+                                       pci_unmap_len(desc, dst_len),
+                                       PCI_DMA_FROMDEVICE);
+                       pci_unmap_page(chan->device->pdev,
+                                       pci_unmap_addr(desc, src),
+                                       pci_unmap_len(desc, src_len),
+                                       PCI_DMA_TODEVICE);
+               }
+
+               if (desc->phys != phys_complete) {
+                       /* a completed entry, but not the last, so cleanup */
+                       list_del(&desc->node);
+                       list_add_tail(&desc->node, &chan->free_desc);
+               } else {
+                       /* last used desc. Do not remove, so we can append from
+                          it, but don't look at it next time, either */
+                       desc->cookie = 0;
+
+                       /* TODO check status bits? */
+                       break;
+               }
+       }
+
+       spin_unlock_bh(&chan->desc_lock);
+
+       chan->last_completion = phys_complete;
+       if (cookie != 0)
+               chan->completed_cookie = cookie;
+
+       spin_unlock(&chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
+ * @chan: IOAT DMA channel handle
+ * @cookie: DMA transaction identifier
+ */
+
+static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
+                                            dma_cookie_t cookie,
+                                            dma_cookie_t *done,
+                                            dma_cookie_t *used)
+{
+       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+       enum dma_status ret;
+
+       last_used = chan->cookie;
+       last_complete = ioat_chan->completed_cookie;
+
+       if (done)
+               *done= last_complete;
+       if (used)
+               *used = last_used;
+
+       ret = dma_async_is_complete(cookie, last_complete, last_used);
+       if (ret == DMA_SUCCESS)
+               return ret;
+
+       ioat_dma_memcpy_cleanup(ioat_chan);
+
+       last_used = chan->cookie;
+       last_complete = ioat_chan->completed_cookie;
+
+       if (done)
+               *done= last_complete;
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* PCI API */
+
+static struct pci_device_id ioat_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+       { 0, }
+};
+
+static struct pci_driver ioat_pci_drv = {
+       .name   = "ioatdma",
+       .id_table = ioat_pci_tbl,
+       .probe  = ioat_probe,
+       .remove = __devexit_p(ioat_remove),
+};
+
+static irqreturn_t ioat_do_interrupt(int irq, void *data, struct pt_regs *regs)
+{
+       struct ioat_device *instance = data;
+       unsigned long attnstatus;
+       u8 intrctrl;
+
+       intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET);
+
+       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+               return IRQ_NONE;
+
+       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+               ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
+               return IRQ_NONE;
+       }
+
+       attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET);
+
+       printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);
+
+       ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl);
+       return IRQ_HANDLED;
+}
+
+static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
+{
+       struct ioat_desc_sw *desc;
+
+       spin_lock_bh(&ioat_chan->desc_lock);
+
+       if (!list_empty(&ioat_chan->free_desc)) {
+               desc = to_ioat_desc(ioat_chan->free_desc.next);
+               list_del(&desc->node);
+       } else {
+               /* try to get another desc */
+               spin_unlock_bh(&ioat_chan->desc_lock);
+               desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
+               spin_lock_bh(&ioat_chan->desc_lock);
+               /* will this ever happen? */
+               BUG_ON(!desc);
+       }
+
+       desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+       desc->hw->next = 0;
+
+       list_add_tail(&desc->node, &ioat_chan->used_desc);
+       spin_unlock_bh(&ioat_chan->desc_lock);
+
+#if (BITS_PER_LONG == 64)
+       ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys);
+#else
+       ioatdma_chan_write32(ioat_chan,
+                            IOAT_CHAINADDR_OFFSET_LOW,
+                            (u32) desc->phys);
+       ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0);
+#endif
+       ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static int ioat_self_test(struct ioat_device *device)
+{
+       int i;
+       u8 *src;
+       u8 *dest;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       int err = 0;
+
+       src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL);
+       if (!src)
+               return -ENOMEM;
+       dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL);
+       if (!dest) {
+               kfree(src);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffer */
+       for (i = 0; i < IOAT_TEST_SIZE; i++)
+               src[i] = (u8)i;
+
+       /* Start copy, using first DMA channel */
+       dma_chan = container_of(device->common.channels.next,
+                               struct dma_chan,
+                               device_node);
+       if (ioat_dma_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE);
+       ioat_dma_memcpy_issue_pending(dma_chan);
+       msleep(1);
+
+       if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               printk(KERN_ERR "ioatdma: Self-test copy timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+               printk(KERN_ERR "ioatdma: Self-test copy failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       ioat_dma_free_chan_resources(dma_chan);
+out:
+       kfree(src);
+       kfree(dest);
+       return err;
+}
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+                                const struct pci_device_id *ent)
+{
+       int err;
+       unsigned long mmio_start, mmio_len;
+       void *reg_base;
+       struct ioat_device *device;
+
+       err = pci_enable_device(pdev);
+       if (err)
+               goto err_enable_device;
+
+       err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+       if (err)
+               err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+       if (err)
+               goto err_set_dma_mask;
+
+       err = pci_request_regions(pdev, ioat_pci_drv.name);
+       if (err)
+               goto err_request_regions;
+
+       mmio_start = pci_resource_start(pdev, 0);
+       mmio_len = pci_resource_len(pdev, 0);
+
+       reg_base = ioremap(mmio_start, mmio_len);
+       if (!reg_base) {
+               err = -ENOMEM;
+               goto err_ioremap;
+       }
+
+       device = kzalloc(sizeof(*device), GFP_KERNEL);
+       if (!device) {
+               err = -ENOMEM;
+               goto err_kzalloc;
+       }
+
+       /* DMA coherent memory pool for DMA descriptor allocations */
+       device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+               sizeof(struct ioat_dma_descriptor), 64, 0);
+       if (!device->dma_pool) {
+               err = -ENOMEM;
+               goto err_dma_pool;
+       }
+
+       device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES);
+       if (!device->completion_pool) {
+               err = -ENOMEM;
+               goto err_completion_pool;
+       }
+
+       device->pdev = pdev;
+       pci_set_drvdata(pdev, device);
+#ifdef CONFIG_PCI_MSI
+       if (pci_enable_msi(pdev) == 0) {
+               device->msi = 1;
+       } else {
+               device->msi = 0;
+       }
+#endif
+       err = request_irq(pdev->irq, &ioat_do_interrupt, SA_SHIRQ, "ioat",
+               device);
+       if (err)
+               goto err_irq;
+
+       device->reg_base = reg_base;
+
+       ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN);
+       pci_set_master(pdev);
+
+       INIT_LIST_HEAD(&device->common.channels);
+       enumerate_dma_channels(device);
+
+       device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
+       device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
+       device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf;
+       device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg;
+       device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg;
+       device->common.device_memcpy_complete = ioat_dma_is_complete;
+       device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending;
+       printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
+               device->common.chancnt);
+
+       err = ioat_self_test(device);
+       if (err)
+               goto err_self_test;
+
+       dma_async_device_register(&device->common);
+
+       return 0;
+
+err_self_test:
+err_irq:
+       pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+       pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+       kfree(device);
+err_kzalloc:
+       iounmap(reg_base);
+err_ioremap:
+       pci_release_regions(pdev);
+err_request_regions:
+err_set_dma_mask:
+       pci_disable_device(pdev);
+err_enable_device:
+       return err;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+       struct ioat_device *device;
+       struct dma_chan *chan, *_chan;
+       struct ioat_dma_chan *ioat_chan;
+
+       device = pci_get_drvdata(pdev);
+       dma_async_device_unregister(&device->common);
+
+       free_irq(device->pdev->irq, device);
+#ifdef CONFIG_PCI_MSI
+       if (device->msi)
+               pci_disable_msi(device->pdev);
+#endif
+       pci_pool_destroy(device->dma_pool);
+       pci_pool_destroy(device->completion_pool);
+       iounmap(device->reg_base);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) {
+               ioat_chan = to_ioat_chan(chan);
+               list_del(&chan->device_node);
+               kfree(ioat_chan);
+       }
+       kfree(device);
+}
+
+/* MODULE API */
+MODULE_VERSION("1.7");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static int __init ioat_init_module(void)
+{
+       /* it's currently unsafe to unload this module */
+       /* if forced, worst case is that rmmod hangs */
+       if (THIS_MODULE != NULL)
+               THIS_MODULE->unsafe = 1;
+
+       return pci_module_init(&ioat_pci_drv);
+}
+
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+       pci_unregister_driver(&ioat_pci_drv);
+}
+
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
new file mode 100644 (file)
index 0000000..a5d3b36
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "ioatdma_hw.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+
+#define IOAT_LOW_COMPLETION_MASK       0xffffffc0
+
+extern struct list_head dma_device_list;
+extern struct list_head dma_client_list;
+
+/**
+ * struct ioat_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @msi: Message Signaled Interrupt number
+ */
+
+struct ioat_device {
+       struct pci_dev *pdev;
+       void *reg_base;
+       struct pci_pool *dma_pool;
+       struct pci_pool *completion_pool;
+
+       struct dma_device common;
+       u8 msi;
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ * @device:
+ * @reg_base:
+ * @sw_in_use:
+ * @completion:
+ * @completion_low:
+ * @completion_high:
+ * @completed_cookie: last cookie seen completed on cleanup
+ * @cookie: value of last cookie given to client
+ * @last_completion:
+ * @xfercap:
+ * @desc_lock:
+ * @free_desc:
+ * @used_desc:
+ * @resource:
+ * @device_node:
+ */
+
+struct ioat_dma_chan {
+
+       void *reg_base;
+
+       dma_cookie_t completed_cookie;
+       unsigned long last_completion;
+
+       u32 xfercap;    /* XFERCAP register value expanded out */
+
+       spinlock_t cleanup_lock;
+       spinlock_t desc_lock;
+       struct list_head free_desc;
+       struct list_head used_desc;
+
+       int pending;
+
+       struct ioat_device *device;
+       struct dma_chan common;
+
+       dma_addr_t completion_addr;
+       union {
+               u64 full; /* HW completion writeback */
+               struct {
+                       u32 low;
+                       u32 high;
+               };
+       } *completion_virt;
+};
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor
+ * @node:
+ * @cookie:
+ * @phys:
+ */
+
+struct ioat_desc_sw {
+       struct ioat_dma_descriptor *hw;
+       struct list_head node;
+       dma_cookie_t cookie;
+       dma_addr_t phys;
+       DECLARE_PCI_UNMAP_ADDR(src)
+       DECLARE_PCI_UNMAP_LEN(src_len)
+       DECLARE_PCI_UNMAP_ADDR(dst)
+       DECLARE_PCI_UNMAP_LEN(dst_len)
+};
+
+#endif /* IOATDMA_H */
+
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
new file mode 100644 (file)
index 0000000..4d7a128
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID                   0x8086
+#define IOAT_PCI_DID                   0x1A38
+#define IOAT_PCI_RID                   0x00
+#define IOAT_PCI_SVID                  0x8086
+#define IOAT_PCI_SID                   0x8086
+#define IOAT_VER                       0x12    /* Version 1.2 */
+
+struct ioat_dma_descriptor {
+       uint32_t        size;
+       uint32_t        ctl;
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        rsv2;
+       uint64_t        user1;
+       uint64_t        user2;
+};
+
+#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
+#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
+#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
+#define IOAT_DMA_DESCRIPTOR_CTL_FRAME  0x00000010
+#define IOAT_DMA_DESCRIPTOR_NUL                0x00000020
+#define IOAT_DMA_DESCRIPTOR_OPCODE     0xFF000000
+
+#endif
diff --git a/drivers/dma/ioatdma_io.h b/drivers/dma/ioatdma_io.h
new file mode 100644 (file)
index 0000000..c0b4bf6
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_IO_H
+#define IOATDMA_IO_H
+
+#include <asm/io.h>
+
+/*
+ * device and per-channel MMIO register read and write functions
+ * this is a lot of anoying inline functions, but it's typesafe
+ */
+
+static inline u8 ioatdma_read8(struct ioat_device *device,
+                               unsigned int offset)
+{
+       return readb(device->reg_base + offset);
+}
+
+static inline u16 ioatdma_read16(struct ioat_device *device,
+                                 unsigned int offset)
+{
+       return readw(device->reg_base + offset);
+}
+
+static inline u32 ioatdma_read32(struct ioat_device *device,
+                                 unsigned int offset)
+{
+       return readl(device->reg_base + offset);
+}
+
+static inline void ioatdma_write8(struct ioat_device *device,
+                                  unsigned int offset, u8 value)
+{
+       writeb(value, device->reg_base + offset);
+}
+
+static inline void ioatdma_write16(struct ioat_device *device,
+                                   unsigned int offset, u16 value)
+{
+       writew(value, device->reg_base + offset);
+}
+
+static inline void ioatdma_write32(struct ioat_device *device,
+                                   unsigned int offset, u32 value)
+{
+       writel(value, device->reg_base + offset);
+}
+
+static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan,
+                                    unsigned int offset)
+{
+       return readb(chan->reg_base + offset);
+}
+
+static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan,
+                                      unsigned int offset)
+{
+       return readw(chan->reg_base + offset);
+}
+
+static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan,
+                                      unsigned int offset)
+{
+       return readl(chan->reg_base + offset);
+}
+
+static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan,
+                                       unsigned int offset, u8 value)
+{
+       writeb(value, chan->reg_base + offset);
+}
+
+static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan,
+                                        unsigned int offset, u16 value)
+{
+       writew(value, chan->reg_base + offset);
+}
+
+static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan,
+                                        unsigned int offset, u32 value)
+{
+       writel(value, chan->reg_base + offset);
+}
+
+#if (BITS_PER_LONG == 64)
+static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan,
+                                      unsigned int offset)
+{
+       return readq(chan->reg_base + offset);
+}
+
+static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan,
+                                        unsigned int offset, u64 value)
+{
+       writeq(value, chan->reg_base + offset);
+}
+#endif
+
+#endif /* IOATDMA_IO_H */
+
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
new file mode 100644 (file)
index 0000000..41a21ab
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
+#define IOAT_XFERCAP_4KB                       12
+#define IOAT_XFERCAP_8KB                       13
+#define IOAT_XFERCAP_16KB                      14
+#define IOAT_XFERCAP_32KB                      15
+#define IOAT_XFERCAP_32GB                      0
+
+#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN                  0x01
+
+#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
+
+#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
+#define IOAT_VER_MAJOR_MASK                    0xF0
+#define IOAT_VER_MINOR_MASK                    0x0F
+#define GET_IOAT_VER_MAJOR(x)                  ((x) & IOAT_VER_MAJOR_MASK)
+#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK          0x3FFF  /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalesing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
+
+
+#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
+#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
+#define IOAT_CHANCTRL_INT_DISABLE              0x0001
+
+#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatability */
+#define IOAT_DMA_COMP_V1                       0x0001  /* Compatability with DMA version 1 */
+
+#define IOAT_CHANSTS_OFFSET                    0x04    /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET_LOW                        0x04
+#define IOAT_CHANSTS_OFFSET_HIGH               0x08
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR 0xFFFFFFFFFFFFFFC0
+#define IOAT_CHANSTS_SOFT_ERR                  0x0000000000000010
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS       0x0000000000000007
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE        0x0
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE  0x1
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED     0x2
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED        0x3
+
+#define IOAT_CHAINADDR_OFFSET                  0x0C    /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET_LOW              0x0C
+#define IOAT_CHAINADDR_OFFSET_HIGH             0x10
+
+#define IOAT_CHANCMD_OFFSET                    0x14    /*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_RESET                     0x20
+#define IOAT_CHANCMD_RESUME                    0x10
+#define IOAT_CHANCMD_ABORT                     0x08
+#define IOAT_CHANCMD_SUSPEND                   0x04
+#define IOAT_CHANCMD_APPEND                    0x02
+#define IOAT_CHANCMD_START                     0x01
+
+#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW                        0x18
+#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
+
+#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW                   0x20
+#define IOAT_CDAR_OFFSET_HIGH                  0x24
+
+#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR        0x0002
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR  0x0004
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR     0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
+#define IOAT_CHANERR_CHANCMD_ERR               0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
+#define IOAT_CHANERR_READ_DATA_ERR             0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
+#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR    0x0400
+#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR     0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
+#define IOAT_CHANERR_SOFT_ERR                  0x4000
+
+#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644 (file)
index 0000000..5ed327e
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+int num_pages_spanned(struct iovec *iov)
+{
+       return
+       ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
+       ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
+{
+       struct dma_pinned_list *local_list;
+       struct page **pages;
+       int i;
+       int ret;
+       int nr_iovecs = 0;
+       int iovec_len_used = 0;
+       int iovec_pages_used = 0;
+       long err;
+
+       /* don't pin down non-user-based iovecs */
+       if (segment_eq(get_fs(), KERNEL_DS))
+               return NULL;
+
+       /* determine how many iovecs/pages there are, up front */
+       do {
+               iovec_len_used += iov[nr_iovecs].iov_len;
+               iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
+               nr_iovecs++;
+       } while (iovec_len_used < len);
+
+       /* single kmalloc for pinned list, page_list[], and the page arrays */
+       local_list = kmalloc(sizeof(*local_list)
+               + (nr_iovecs * sizeof (struct dma_page_list))
+               + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+       if (!local_list) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /* list of pages starts right after the page list array */
+       pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+       for (i = 0; i < nr_iovecs; i++) {
+               struct dma_page_list *page_list = &local_list->page_list[i];
+
+               len -= iov[i].iov_len;
+
+               if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) {
+                       err = -EFAULT;
+                       goto unpin;
+               }
+
+               page_list->nr_pages = num_pages_spanned(&iov[i]);
+               page_list->base_address = iov[i].iov_base;
+
+               page_list->pages = pages;
+               pages += page_list->nr_pages;
+
+               /* pin pages down */
+               down_read(&current->mm->mmap_sem);
+               ret = get_user_pages(
+                       current,
+                       current->mm,
+                       (unsigned long) iov[i].iov_base,
+                       page_list->nr_pages,
+                       1,      /* write */
+                       0,      /* force */
+                       page_list->pages,
+                       NULL);
+               up_read(&current->mm->mmap_sem);
+
+               if (ret != page_list->nr_pages) {
+                       err = -ENOMEM;
+                       goto unpin;
+               }
+
+               local_list->nr_iovecs = i + 1;
+       }
+
+       return local_list;
+
+unpin:
+       dma_unpin_iovec_pages(local_list);
+out:
+       return ERR_PTR(err);
+}
+
+void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
+{
+       int i, j;
+
+       if (!pinned_list)
+               return;
+
+       for (i = 0; i < pinned_list->nr_iovecs; i++) {
+               struct dma_page_list *page_list = &pinned_list->page_list[i];
+               for (j = 0; j < page_list->nr_pages; j++) {
+                       set_page_dirty_lock(page_list->pages[j]);
+                       page_cache_release(page_list->pages[j]);
+               }
+       }
+
+       kfree(pinned_list);
+}
+
+static dma_cookie_t dma_memcpy_to_kernel_iovec(struct dma_chan *chan, struct
+       iovec *iov, unsigned char *kdata, size_t len)
+{
+       dma_cookie_t dma_cookie = 0;
+
+       while (len > 0) {
+               if (iov->iov_len) {
+                       int copy = min_t(unsigned int, iov->iov_len, len);
+                       dma_cookie = dma_async_memcpy_buf_to_buf(
+                                       chan,
+                                       iov->iov_base,
+                                       kdata,
+                                       copy);
+                       kdata += copy;
+                       len -= copy;
+                       iov->iov_len -= copy;
+                       iov->iov_base += copy;
+               }
+               iov++;
+       }
+
+       return dma_cookie;
+}
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ *   by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
+       struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
+{
+       int iov_byte_offset;
+       int copy;
+       dma_cookie_t dma_cookie = 0;
+       int iovec_idx;
+       int page_idx;
+
+       if (!chan)
+               return memcpy_toiovec(iov, kdata, len);
+
+       /* -> kernel copies (e.g. smbfs) */
+       if (!pinned_list)
+               return dma_memcpy_to_kernel_iovec(chan, iov, kdata, len);
+
+       iovec_idx = 0;
+       while (iovec_idx < pinned_list->nr_iovecs) {
+               struct dma_page_list *page_list;
+
+               /* skip already used-up iovecs */
+               while (!iov[iovec_idx].iov_len)
+                       iovec_idx++;
+
+               page_list = &pinned_list->page_list[iovec_idx];
+
+               iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+               page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+                        - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+               /* break up copies to not cross page boundary */
+               while (iov[iovec_idx].iov_len) {
+                       copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+                       copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+                       dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+                                       page_list->pages[page_idx],
+                                       iov_byte_offset,
+                                       kdata,
+                                       copy);
+
+                       len -= copy;
+                       iov[iovec_idx].iov_len -= copy;
+                       iov[iovec_idx].iov_base += copy;
+
+                       if (!len)
+                               return dma_cookie;
+
+                       kdata += copy;
+                       iov_byte_offset = 0;
+                       page_idx++;
+               }
+               iovec_idx++;
+       }
+
+       /* really bad if we ever run out of iovecs */
+       BUG();
+       return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
+       struct dma_pinned_list *pinned_list, struct page *page,
+       unsigned int offset, size_t len)
+{
+       int iov_byte_offset;
+       int copy;
+       dma_cookie_t dma_cookie = 0;
+       int iovec_idx;
+       int page_idx;
+       int err;
+
+       /* this needs as-yet-unimplemented buf-to-buff, so punt. */
+       /* TODO: use dma for this */
+       if (!chan || !pinned_list) {
+               u8 *vaddr = kmap(page);
+               err = memcpy_toiovec(iov, vaddr + offset, len);
+               kunmap(page);
+               return err;
+       }
+
+       iovec_idx = 0;
+       while (iovec_idx < pinned_list->nr_iovecs) {
+               struct dma_page_list *page_list;
+
+               /* skip already used-up iovecs */
+               while (!iov[iovec_idx].iov_len)
+                       iovec_idx++;
+
+               page_list = &pinned_list->page_list[iovec_idx];
+
+               iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+               page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+                        - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+               /* break up copies to not cross page boundary */
+               while (iov[iovec_idx].iov_len) {
+                       copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+                       copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+                       dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+                                       page_list->pages[page_idx],
+                                       iov_byte_offset,
+                                       page,
+                                       offset,
+                                       copy);
+
+                       len -= copy;
+                       iov[iovec_idx].iov_len -= copy;
+                       iov[iovec_idx].iov_base += copy;
+
+                       if (!len)
+                               return dma_cookie;
+
+                       offset += copy;
+                       iov_byte_offset = 0;
+                       page_idx++;
+               }
+               iovec_idx++;
+       }
+
+       /* really bad if we ever run out of iovecs */
+       BUG();
+       return -EFAULT;
+}
index ec41c8f46c7278f00818eddd30ea7c69b1a00bd1..216471fa01cc48fed536e16acf817d9b88383886 100644 (file)
@@ -820,7 +820,8 @@ void ipoib_mcast_restart_task(void *dev_ptr)
 
        ipoib_mcast_stop_thread(dev, 0);
 
-       spin_lock_irqsave(&dev->xmit_lock, flags);
+       local_irq_save(flags);
+       netif_tx_lock(dev);
        spin_lock(&priv->lock);
 
        /*
@@ -895,7 +896,8 @@ void ipoib_mcast_restart_task(void *dev_ptr)
        }
 
        spin_unlock(&priv->lock);
-       spin_unlock_irqrestore(&dev->xmit_lock, flags);
+       netif_tx_unlock(dev);
+       local_irq_restore(flags);
 
        /* We have to cancel outside of the spinlock */
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
index 2f0f35811bf7710c2a95e63bbeaa37c2900be0af..9fd87521a1639bd3dae51dcdce48545614d41a85 100644 (file)
@@ -1052,7 +1052,7 @@ static void wq_set_multicast_list (void *data)
 
        dvb_net_feed_stop(dev);
        priv->rx_mode = RX_MODE_UNI;
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
 
        if (dev->flags & IFF_PROMISC) {
                dprintk("%s: promiscuous mode\n", dev->name);
@@ -1077,7 +1077,7 @@ static void wq_set_multicast_list (void *data)
                }
        }
 
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        dvb_net_feed_start(dev);
 }
 
index f6d51ce34b005c99e90eb8e1fd8a824a055cd6ed..bb44509fd404b66791793c8bf3da6ea8a8fa8b3a 100644 (file)
@@ -909,7 +909,7 @@ MODULE_PARM_DESC(irq, "EtherLink IRQ number");
  * here also causes the module to be unloaded
  */
  
-int init_module(void)
+int __init init_module(void)
 {
        dev_3c501 = el1_probe(-1);
        if (IS_ERR(dev_3c501))
index dcc98afa65d717c7abe1f7360c0c9a90c75c3ce7..cb5ef75450dc72c19b98a77c5c5f8ce9c39fc5c8 100644 (file)
@@ -688,7 +688,7 @@ MODULE_LICENSE("GPL");
 
 /* This is set up so that only a single autoprobe takes place per call.
 ISA device autoprobes on a running machine are not recommended. */
-int
+int __init
 init_module(void)
 {
        struct net_device *dev;
index 111601ca4ca3da97d054cf3a70dfad08da476735..19c0b856c488097494f69c27ff14e2c3f615deeb 100644 (file)
@@ -1633,7 +1633,7 @@ MODULE_PARM_DESC(io, "EtherLink Plus I/O base address(es)");
 MODULE_PARM_DESC(irq, "EtherLink Plus IRQ number(s) (assigned)");
 MODULE_PARM_DESC(dma, "EtherLink Plus DMA channel(s)");
 
-int init_module(void)
+int __init init_module(void)
 {
        int this_dev, found = 0;
 
index 4db82893909c3118c17d0ee9435b72cf8666c112..6039049259ed1c1e94e56d787fbfb4dd7b57bceb 100644 (file)
@@ -932,7 +932,7 @@ module_param(irq, int, 0);
 MODULE_PARM_DESC(io, "EtherLink16 I/O base address");
 MODULE_PARM_DESC(irq, "(ignored)");
 
-int init_module(void)
+int __init init_module(void)
 {
        if (io == 0)
                printk("3c507: You should not use auto-probing with insmod!\n");
index b40885d4168092baa1d3e6e691e1aea0b2428227..4bf8510655c52574a543b34bd6204a40ea47f7f5 100644 (file)
@@ -1277,7 +1277,7 @@ MODULE_PARM_DESC(io, "EtherLink/MC I/O base address(es)");
 MODULE_PARM_DESC(irq, "EtherLink/MC IRQ number(s)");
 MODULE_LICENSE("GPL");
 
-int init_module(void)
+int __init init_module(void)
 {
        int this_dev,found = 0;
 
index 6db3301e7965adfbc53281405ac6220c0204a696..1b1cb0026072e71d200d6a7e8fd01dadf9582661 100644 (file)
@@ -1646,7 +1646,7 @@ static struct net_device *this_device;
  *     insmod multiple modules for now but it's a hack.
  */
 
-int init_module(void)
+int __init init_module(void)
 {
        this_device = mc32_probe(-1);
        if (IS_ERR(this_device))
index 066e22b01a941ed90fcc3edf028ed217b8688da0..46d8c01437e929080703eaf88e617cde75f0106b 100644 (file)
        See the file COPYING in this distribution for more information.
 
        Contributors:
-       
+
                Wake-on-LAN support - Felipe Damasio <felipewd@terra.com.br>
                PCI suspend/resume  - Felipe Damasio <felipewd@terra.com.br>
                LinkChg interrupt   - Felipe Damasio <felipewd@terra.com.br>
-                       
+
        TODO:
        * Test Tx checksumming thoroughly
        * Implement dev->tx_timeout
@@ -461,7 +461,7 @@ static void cp_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 static inline void cp_set_rxbufsize (struct cp_private *cp)
 {
        unsigned int mtu = cp->dev->mtu;
-       
+
        if (mtu > ETH_DATA_LEN)
                /* MTU + ethernet header + FCS + optional VLAN tag */
                cp->rx_buf_sz = mtu + ETH_HLEN + 8;
@@ -510,7 +510,7 @@ static void cp_rx_err_acct (struct cp_private *cp, unsigned rx_tail,
 static inline unsigned int cp_rx_csum_ok (u32 status)
 {
        unsigned int protocol = (status >> 16) & 0x3;
-       
+
        if (likely((protocol == RxProtoTCP) && (!(status & TCPFail))))
                return 1;
        else if ((protocol == RxProtoUDP) && (!(status & UDPFail)))
@@ -1061,7 +1061,7 @@ static void cp_init_hw (struct cp_private *cp)
        cpw8(Config3, PARMEnable);
        cp->wol_enabled = 0;
 
-       cpw8(Config5, cpr8(Config5) & PMEStatus); 
+       cpw8(Config5, cpr8(Config5) & PMEStatus);
 
        cpw32_f(HiTxRingAddr, 0);
        cpw32_f(HiTxRingAddr + 4, 0);
@@ -1351,7 +1351,7 @@ static void netdev_get_wol (struct cp_private *cp,
                         WAKE_MCAST | WAKE_UCAST;
        /* We don't need to go on if WOL is disabled */
        if (!cp->wol_enabled) return;
-       
+
        options        = cpr8 (Config3);
        if (options & LinkUp)        wol->wolopts |= WAKE_PHY;
        if (options & MagicPacket)   wol->wolopts |= WAKE_MAGIC;
@@ -1919,7 +1919,7 @@ static int cp_resume (struct pci_dev *pdev)
        mii_check_media(&cp->mii_if, netif_msg_link(cp), FALSE);
 
        spin_unlock_irqrestore (&cp->lock, flags);
-       
+
        return 0;
 }
 #endif /* CONFIG_PM */
index feae7832fc846b3daef50d83d5cf0184561b9484..abd6261465f16e2692c45033ab81e3263dc6716e 100644 (file)
@@ -165,7 +165,7 @@ static int multicast_filter_limit = 32;
 static int debug = -1;
 
 /*
- * Receive ring size 
+ * Receive ring size
  * Warning: 64K ring has hardware issues and may lock up.
  */
 #if defined(CONFIG_SH_DREAMCAST)
@@ -257,7 +257,7 @@ static struct pci_device_id rtl8139_pci_tbl[] = {
        {0x018a, 0x0106, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
        {0x126c, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
        {0x1743, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
-       {0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, 
+       {0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
 
 #ifdef CONFIG_SH_SECUREEDGE5410
        /* Bogus 8139 silicon reports 8129 without external PROM :-( */
@@ -1824,7 +1824,7 @@ static void rtl8139_rx_err (u32 rx_status, struct net_device *dev,
        int tmp_work;
 #endif
 
-       if (netif_msg_rx_err (tp)) 
+       if (netif_msg_rx_err (tp))
                printk(KERN_DEBUG "%s: Ethernet frame had errors, status %8.8x.\n",
                        dev->name, rx_status);
        tp->stats.rx_errors++;
@@ -1944,7 +1944,7 @@ static int rtl8139_rx(struct net_device *dev, struct rtl8139_private *tp,
                 RTL_R16 (RxBufAddr),
                 RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
 
-       while (netif_running(dev) && received < budget 
+       while (netif_running(dev) && received < budget
               && (RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {
                u32 ring_offset = cur_rx % RX_BUF_LEN;
                u32 rx_status;
@@ -2031,7 +2031,7 @@ no_early_rx:
 
                        netif_receive_skb (skb);
                } else {
-                       if (net_ratelimit()) 
+                       if (net_ratelimit())
                                printk (KERN_WARNING
                                        "%s: Memory squeeze, dropping packet.\n",
                                        dev->name);
@@ -2158,13 +2158,13 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance,
        status = RTL_R16 (IntrStatus);
 
        /* shared irq? */
-       if (unlikely((status & rtl8139_intr_mask) == 0)) 
+       if (unlikely((status & rtl8139_intr_mask) == 0))
                goto out;
 
        handled = 1;
 
        /* h/w no longer present (hotplug?) or major error, bail */
-       if (unlikely(status == 0xFFFF)) 
+       if (unlikely(status == 0xFFFF))
                goto out;
 
        /* close possible race's with dev_close */
index bdaaad8f2123d5410c1985ceb526e200bb1958e6..0c6b45a11d15f876819d395314b5e8fd5f56078b 100644 (file)
@@ -447,6 +447,7 @@ config MIPS_GT96100ETH
 config MIPS_AU1X00_ENET
        bool "MIPS AU1000 Ethernet support"
        depends on NET_ETHERNET && SOC_AU1X00
+       select PHYLIB
        select CRC32
        help
          If you have an Alchemy Semi AU1X00 based system
@@ -865,6 +866,22 @@ config DM9000
          <file:Documentation/networking/net-modules.txt>.  The module will be
          called dm9000.
 
+config SMC911X
+       tristate "SMSC LAN911[5678] support"
+       select CRC32
+       select MII
+       depends on NET_ETHERNET && ARCH_PXA
+       help
+         This is a driver for SMSC's LAN911x series of Ethernet chipsets
+         including the new LAN9115, LAN9116, LAN9117, and LAN9118.
+         Say Y if you want it compiled into the kernel, 
+         and read the Ethernet-HOWTO, available from
+         <http://www.linuxdoc.org/docs.html#howto>.
+
+         This driver is also available as a module. The module will be 
+         called smc911x.  If you want to compile it as a module, say M 
+         here and read <file:Documentation/modules.txt>
+
 config NET_VENDOR_RACAL
        bool "Racal-Interlan (Micom) NI cards"
        depends on NET_ETHERNET && ISA
@@ -2163,6 +2180,8 @@ config TIGON3
 config BNX2
        tristate "Broadcom NetXtremeII support"
        depends on PCI
+       select CRC32
+       select ZLIB_INFLATE
        help
          This driver supports Broadcom NetXtremeII gigabit Ethernet cards.
 
@@ -2311,6 +2330,23 @@ config S2IO_NAPI
 
          If in doubt, say N.
 
+config MYRI10GE
+       tristate "Myricom Myri-10G Ethernet support"
+       depends on PCI
+       select FW_LOADER
+       select CRC32
+       ---help---
+         This driver supports Myricom Myri-10G Dual Protocol interface in
+         Ethernet mode. If the eeprom on your board is not recent enough,
+         you will need a newer firmware image.
+         You may get this image or more information, at:
+
+         <http://www.myri.com/Myri-10G/>
+
+         To compile this driver as a module, choose M here and read
+         <file:Documentation/networking/net-modules.txt>.  The module
+         will be called myri10ge.
+
 endmenu
 
 source "drivers/net/tokenring/Kconfig"
index b90468aea077aee9a2b5ee95f10acd7869fd72aa..1eced3287507e94462b820408a3a2a8b0c90567f 100644 (file)
@@ -192,7 +192,9 @@ obj-$(CONFIG_R8169) += r8169.o
 obj-$(CONFIG_AMD8111_ETH) += amd8111e.o
 obj-$(CONFIG_IBMVETH) += ibmveth.o
 obj-$(CONFIG_S2IO) += s2io.o
+obj-$(CONFIG_MYRI10GE) += myri10ge/
 obj-$(CONFIG_SMC91X) += smc91x.o
+obj-$(CONFIG_SMC911X) += smc911x.o
 obj-$(CONFIG_DM9000) += dm9000.o
 obj-$(CONFIG_FEC_8XX) += fec_8xx/
 
index 14dbad14afb64da8be9d94f57d74be1579d2f1d7..038d5fcb15e67439bc23ce782eaf3b4beabe1751 100644 (file)
@@ -2,13 +2,16 @@
  *
  * Alchemy Au1x00 ethernet driver
  *
- * Copyright 2001,2002,2003 MontaVista Software Inc.
+ * Copyright 2001-2003, 2006 MontaVista Software Inc.
  * Copyright 2002 TimeSys Corp.
  * Added ethtool/mii-tool support,
  * Copyright 2004 Matt Porter <mporter@kernel.crashing.org>
  * Update: 2004 Bjoern Riemer, riemer@fokus.fraunhofer.de 
  * or riemer@riemer-nt.de: fixed the link beat detection with 
  * ioctls (SIOCGMIIPHY)
+ * Copyright 2006 Herbert Valerio Riedel <hvr@gnu.org>
+ *  converted to use linux-2.6.x's PHY framework
+ *
  * Author: MontaVista Software, Inc.
  *             ppopov@mvista.com or source@mvista.com
  *
@@ -53,6 +56,7 @@
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/crc32.h>
+#include <linux/phy.h>
 #include <asm/mipsregs.h>
 #include <asm/irq.h>
 #include <asm/io.h>
@@ -68,7 +72,7 @@ static int au1000_debug = 5;
 static int au1000_debug = 3;
 #endif
 
-#define DRV_NAME       "au1000eth"
+#define DRV_NAME       "au1000_eth"
 #define DRV_VERSION    "1.5"
 #define DRV_AUTHOR     "Pete Popov <ppopov@embeddedalley.com>"
 #define DRV_DESC       "Au1xxx on-chip Ethernet driver"
@@ -80,7 +84,7 @@ MODULE_LICENSE("GPL");
 // prototypes
 static void hard_stop(struct net_device *);
 static void enable_rx_tx(struct net_device *dev);
-static struct net_device * au1000_probe(u32 ioaddr, int irq, int port_num);
+static struct net_device * au1000_probe(int port_num);
 static int au1000_init(struct net_device *);
 static int au1000_open(struct net_device *);
 static int au1000_close(struct net_device *);
@@ -88,17 +92,15 @@ static int au1000_tx(struct sk_buff *, struct net_device *);
 static int au1000_rx(struct net_device *);
 static irqreturn_t au1000_interrupt(int, void *, struct pt_regs *);
 static void au1000_tx_timeout(struct net_device *);
-static int au1000_set_config(struct net_device *dev, struct ifmap *map);
 static void set_rx_mode(struct net_device *);
 static struct net_device_stats *au1000_get_stats(struct net_device *);
-static void au1000_timer(unsigned long);
 static int au1000_ioctl(struct net_device *, struct ifreq *, int);
 static int mdio_read(struct net_device *, int, int);
 static void mdio_write(struct net_device *, int, int, u16);
-static void dump_mii(struct net_device *dev, int phy_id);
+static void au1000_adjust_link(struct net_device *);
+static void enable_mac(struct net_device *, int);
 
 // externs
-extern  void ack_rise_edge_irq(unsigned int);
 extern int get_ethernet_addr(char *ethernet_addr);
 extern void str2eaddr(unsigned char *ea, unsigned char *str);
 extern char * __init prom_getcmdline(void);
@@ -126,705 +128,83 @@ static unsigned char au1000_mac_addr[6] __devinitdata = {
        0x00, 0x50, 0xc2, 0x0c, 0x30, 0x00
 };
 
-#define nibswap(x) ((((x) >> 4) & 0x0f) | (((x) << 4) & 0xf0))
-#define RUN_AT(x) (jiffies + (x))
-
-// For reading/writing 32-bit words from/to DMA memory
-#define cpu_to_dma32 cpu_to_be32
-#define dma32_to_cpu be32_to_cpu
-
 struct au1000_private *au_macs[NUM_ETH_INTERFACES];
 
-/* FIXME 
- * All of the PHY code really should be detached from the MAC 
- * code.
+/*
+ * board-specific configurations
+ *
+ * PHY detection algorithm
+ *
+ * If AU1XXX_PHY_STATIC_CONFIG is undefined, the PHY setup is
+ * autodetected:
+ *
+ * mii_probe() first searches the current MAC's MII bus for a PHY,
+ * selecting the first (or last, if AU1XXX_PHY_SEARCH_HIGHEST_ADDR is
+ * defined) PHY address not already claimed by another netdev.
+ *
+ * If nothing was found that way when searching for the 2nd ethernet
+ * controller's PHY and AU1XXX_PHY1_SEARCH_ON_MAC0 is defined, then
+ * the first MII bus is searched as well for an unclaimed PHY; this is
+ * needed in case of a dual-PHY accessible only through the MAC0's MII
+ * bus.
+ *
+ * Finally, if no PHY is found, then the corresponding ethernet
+ * controller is not registered to the network subsystem.
  */
 
-/* Default advertise */
-#define GENMII_DEFAULT_ADVERTISE \
-       ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | \
-       ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full | \
-       ADVERTISED_Autoneg
-
-#define GENMII_DEFAULT_FEATURES \
-       SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \
-       SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
-       SUPPORTED_Autoneg
-
-int bcm_5201_init(struct net_device *dev, int phy_addr)
-{
-       s16 data;
-       
-       /* Stop auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, data & ~MII_CNTL_AUTO);
-
-       /* Set advertisement to 10/100 and Half/Full duplex
-        * (full capabilities) */
-       data = mdio_read(dev, phy_addr, MII_ANADV);
-       data |= MII_NWAY_TX | MII_NWAY_TX_FDX | MII_NWAY_T_FDX | MII_NWAY_T;
-       mdio_write(dev, phy_addr, MII_ANADV, data);
-       
-       /* Restart auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       data |= MII_CNTL_RST_AUTO | MII_CNTL_AUTO;
-       mdio_write(dev, phy_addr, MII_CONTROL, data);
-
-       if (au1000_debug > 4) 
-               dump_mii(dev, phy_addr);
-       return 0;
-}
-
-int bcm_5201_reset(struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int 
-bcm_5201_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       if (!dev) {
-               printk(KERN_ERR "bcm_5201_status error: NULL dev\n");
-               return -1;
-       }
-       aup = (struct au1000_private *) dev->priv;
-
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, MII_AUX_CNTRL);
-               if (mii_data & MII_AUX_100) {
-                       if (mii_data & MII_AUX_FDX) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else  {
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-int lsi_80227_init(struct net_device *dev, int phy_addr)
-{
-       if (au1000_debug > 4)
-               printk("lsi_80227_init\n");
-
-       /* restart auto-negotiation */
-       mdio_write(dev, phy_addr, MII_CONTROL,
-                  MII_CNTL_F100 | MII_CNTL_AUTO | MII_CNTL_RST_AUTO); // | MII_CNTL_FDX);
-       mdelay(1);
-
-       /* set up LEDs to correct display */
-#ifdef CONFIG_MIPS_MTX1
-       mdio_write(dev, phy_addr, 17, 0xff80);
-#else
-       mdio_write(dev, phy_addr, 17, 0xffc0);
-#endif
-
-       if (au1000_debug > 4)
-               dump_mii(dev, phy_addr);
-       return 0;
-}
-
-int lsi_80227_reset(struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-       if (au1000_debug > 4) {
-               printk("lsi_80227_reset\n");
-               dump_mii(dev, phy_addr);
-       }
-
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int
-lsi_80227_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       if (!dev) {
-               printk(KERN_ERR "lsi_80227_status error: NULL dev\n");
-               return -1;
-       }
-       aup = (struct au1000_private *) dev->priv;
-
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, MII_LSI_PHY_STAT);
-               if (mii_data & MII_LSI_PHY_STAT_SPD) {
-                       if (mii_data & MII_LSI_PHY_STAT_FDX) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else  {
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-int am79c901_init(struct net_device *dev, int phy_addr)
-{
-       printk("am79c901_init\n");
-       return 0;
-}
-
-int am79c901_reset(struct net_device *dev, int phy_addr)
-{
-       printk("am79c901_reset\n");
-       return 0;
-}
-
-int 
-am79c901_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       return 0;
-}
-
-int am79c874_init(struct net_device *dev, int phy_addr)
-{
-       s16 data;
-
-       /* 79c874 has quit resembled bit assignments to BCM5201 */
-       if (au1000_debug > 4)
-               printk("am79c847_init\n");
-
-       /* Stop auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, data & ~MII_CNTL_AUTO);
-
-       /* Set advertisement to 10/100 and Half/Full duplex
-        * (full capabilities) */
-       data = mdio_read(dev, phy_addr, MII_ANADV);
-       data |= MII_NWAY_TX | MII_NWAY_TX_FDX | MII_NWAY_T_FDX | MII_NWAY_T;
-       mdio_write(dev, phy_addr, MII_ANADV, data);
-       
-       /* Restart auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       data |= MII_CNTL_RST_AUTO | MII_CNTL_AUTO;
-
-       mdio_write(dev, phy_addr, MII_CONTROL, data);
-
-       if (au1000_debug > 4) dump_mii(dev, phy_addr);
-       return 0;
-}
-
-int am79c874_reset(struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-       if (au1000_debug > 4)
-               printk("am79c874_reset\n");
-
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int 
-am79c874_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       // printk("am79c874_status\n");
-       if (!dev) {
-               printk(KERN_ERR "am79c874_status error: NULL dev\n");
-               return -1;
-       }
-
-       aup = (struct au1000_private *) dev->priv;
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, MII_AMD_PHY_STAT);
-               if (mii_data & MII_AMD_PHY_STAT_SPD) {
-                       if (mii_data & MII_AMD_PHY_STAT_FDX) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else {
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-int lxt971a_init(struct net_device *dev, int phy_addr)
-{
-       if (au1000_debug > 4)
-               printk("lxt971a_init\n");
-
-       /* restart auto-negotiation */
-       mdio_write(dev, phy_addr, MII_CONTROL,
-                  MII_CNTL_F100 | MII_CNTL_AUTO | MII_CNTL_RST_AUTO | MII_CNTL_FDX);
-
-       /* set up LEDs to correct display */
-       mdio_write(dev, phy_addr, 20, 0x0422);
-
-       if (au1000_debug > 4)
-               dump_mii(dev, phy_addr);
-       return 0;
-}
-
-int lxt971a_reset(struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-       if (au1000_debug > 4) {
-               printk("lxt971a_reset\n");
-               dump_mii(dev, phy_addr);
-       }
-
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int
-lxt971a_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       if (!dev) {
-               printk(KERN_ERR "lxt971a_status error: NULL dev\n");
-               return -1;
-       }
-       aup = (struct au1000_private *) dev->priv;
-
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, MII_INTEL_PHY_STAT);
-               if (mii_data & MII_INTEL_PHY_STAT_SPD) {
-                       if (mii_data & MII_INTEL_PHY_STAT_FDX) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else  {
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-int ks8995m_init(struct net_device *dev, int phy_addr)
-{
-       s16 data;
-       
-//     printk("ks8995m_init\n");
-       /* Stop auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, data & ~MII_CNTL_AUTO);
-
-       /* Set advertisement to 10/100 and Half/Full duplex
-        * (full capabilities) */
-       data = mdio_read(dev, phy_addr, MII_ANADV);
-       data |= MII_NWAY_TX | MII_NWAY_TX_FDX | MII_NWAY_T_FDX | MII_NWAY_T;
-       mdio_write(dev, phy_addr, MII_ANADV, data);
-       
-       /* Restart auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       data |= MII_CNTL_RST_AUTO | MII_CNTL_AUTO;
-       mdio_write(dev, phy_addr, MII_CONTROL, data);
-
-       if (au1000_debug > 4) dump_mii(dev, phy_addr);
-
-       return 0;
-}
-
-int ks8995m_reset(struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-//     printk("ks8995m_reset\n");
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int ks8995m_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       if (!dev) {
-               printk(KERN_ERR "ks8995m_status error: NULL dev\n");
-               return -1;
-       }
-       aup = (struct au1000_private *) dev->priv;
-
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, MII_AUX_CNTRL);
-               if (mii_data & MII_AUX_100) {
-                       if (mii_data & MII_AUX_FDX) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else  {                                                                                 
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-int
-smsc_83C185_init (struct net_device *dev, int phy_addr)
-{
-       s16 data;
-
-       if (au1000_debug > 4)
-               printk("smsc_83C185_init\n");
-
-       /* Stop auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, data & ~MII_CNTL_AUTO);
-
-       /* Set advertisement to 10/100 and Half/Full duplex
-        * (full capabilities) */
-       data = mdio_read(dev, phy_addr, MII_ANADV);
-       data |= MII_NWAY_TX | MII_NWAY_TX_FDX | MII_NWAY_T_FDX | MII_NWAY_T;
-       mdio_write(dev, phy_addr, MII_ANADV, data);
-       
-       /* Restart auto-negotiation */
-       data = mdio_read(dev, phy_addr, MII_CONTROL);
-       data |= MII_CNTL_RST_AUTO | MII_CNTL_AUTO;
-
-       mdio_write(dev, phy_addr, MII_CONTROL, data);
-
-       if (au1000_debug > 4) dump_mii(dev, phy_addr);
-       return 0;
-}
-
-int
-smsc_83C185_reset (struct net_device *dev, int phy_addr)
-{
-       s16 mii_control, timeout;
-       
-       if (au1000_debug > 4)
-               printk("smsc_83C185_reset\n");
-
-       mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-       mdio_write(dev, phy_addr, MII_CONTROL, mii_control | MII_CNTL_RESET);
-       mdelay(1);
-       for (timeout = 100; timeout > 0; --timeout) {
-               mii_control = mdio_read(dev, phy_addr, MII_CONTROL);
-               if ((mii_control & MII_CNTL_RESET) == 0)
-                       break;
-               mdelay(1);
-       }
-       if (mii_control & MII_CNTL_RESET) {
-               printk(KERN_ERR "%s PHY reset timeout !\n", dev->name);
-               return -1;
-       }
-       return 0;
-}
-
-int 
-smsc_83C185_status (struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       u16 mii_data;
-       struct au1000_private *aup;
-
-       if (!dev) {
-               printk(KERN_ERR "smsc_83C185_status error: NULL dev\n");
-               return -1;
-       }
-
-       aup = (struct au1000_private *) dev->priv;
-       mii_data = mdio_read(dev, aup->phy_addr, MII_STATUS);
-
-       if (mii_data & MII_STAT_LINK) {
-               *link = 1;
-               mii_data = mdio_read(dev, aup->phy_addr, 0x1f);
-               if (mii_data & (1<<3)) {
-                       if (mii_data & (1<<4)) {
-                               *speed = IF_PORT_100BASEFX;
-                               dev->if_port = IF_PORT_100BASEFX;
-                       }
-                       else {
-                               *speed = IF_PORT_100BASETX;
-                               dev->if_port = IF_PORT_100BASETX;
-                       }
-               }
-               else {
-                       *speed = IF_PORT_10BASET;
-                       dev->if_port = IF_PORT_10BASET;
-               }
-       }
-       else {
-               *link = 0;
-               *speed = 0;
-               dev->if_port = IF_PORT_UNKNOWN;
-       }
-       return 0;
-}
-
-
-#ifdef CONFIG_MIPS_BOSPORUS
-int stub_init(struct net_device *dev, int phy_addr)
-{
-       //printk("PHY stub_init\n");
-       return 0;
-}
+/* autodetection defaults */
+#undef  AU1XXX_PHY_SEARCH_HIGHEST_ADDR
+#define AU1XXX_PHY1_SEARCH_ON_MAC0
 
-int stub_reset(struct net_device *dev, int phy_addr)
-{
-       //printk("PHY stub_reset\n");
-       return 0;
-}
-
-int 
-stub_status(struct net_device *dev, int phy_addr, u16 *link, u16 *speed)
-{
-       //printk("PHY stub_status\n");
-       *link = 1;
-       /* hmmm, revisit */
-       *speed = IF_PORT_100BASEFX;
-       dev->if_port = IF_PORT_100BASEFX;
-       return 0;
-}
-#endif
-
-struct phy_ops bcm_5201_ops = {
-       bcm_5201_init,
-       bcm_5201_reset,
-       bcm_5201_status,
-};
-
-struct phy_ops am79c874_ops = {
-       am79c874_init,
-       am79c874_reset,
-       am79c874_status,
-};
-
-struct phy_ops am79c901_ops = {
-       am79c901_init,
-       am79c901_reset,
-       am79c901_status,
-};
-
-struct phy_ops lsi_80227_ops = { 
-       lsi_80227_init,
-       lsi_80227_reset,
-       lsi_80227_status,
-};
-
-struct phy_ops lxt971a_ops = { 
-       lxt971a_init,
-       lxt971a_reset,
-       lxt971a_status,
-};
+/* static PHY setup
+ *
+ * most boards PHY setup should be detectable properly with the
+ * autodetection algorithm in mii_probe(), but in some cases (e.g. if
+ * you have a switch attached, or want to use the PHY's interrupt
+ * notification capabilities) you can provide a static PHY
+ * configuration here
+ *
+ * IRQs may only be set, if a PHY address was configured
+ * If a PHY address is given, also a bus id is required to be set
+ *
+ * ps: make sure the used irqs are configured properly in the board
+ * specific irq-map
+ */
 
-struct phy_ops ks8995m_ops = {
-       ks8995m_init,
-       ks8995m_reset,
-       ks8995m_status,
-};
+#if defined(CONFIG_MIPS_BOSPORUS)
+/*
+ * Micrel/Kendin 5 port switch attached to MAC0,
+ * MAC0 is associated with PHY address 5 (== WAN port)
+ * MAC1 is not associated with any PHY, since it's connected directly
+ * to the switch.
+ * no interrupts are used
+ */
+# define AU1XXX_PHY_STATIC_CONFIG
 
-struct phy_ops smsc_83C185_ops = {
-       smsc_83C185_init,
-       smsc_83C185_reset,
-       smsc_83C185_status,
-};
+# define AU1XXX_PHY0_ADDR  5
+# define AU1XXX_PHY0_BUSID 0
+#  undef AU1XXX_PHY0_IRQ
 
-#ifdef CONFIG_MIPS_BOSPORUS
-struct phy_ops stub_ops = {
-       stub_init,
-       stub_reset,
-       stub_status,
-};
+#  undef AU1XXX_PHY1_ADDR
+#  undef AU1XXX_PHY1_BUSID
+#  undef AU1XXX_PHY1_IRQ
 #endif
 
-static struct mii_chip_info {
-       const char * name;
-       u16 phy_id0;
-       u16 phy_id1;
-       struct phy_ops *phy_ops;        
-       int dual_phy;
-} mii_chip_table[] = {
-       {"Broadcom BCM5201 10/100 BaseT PHY",0x0040,0x6212, &bcm_5201_ops,0},
-       {"Broadcom BCM5221 10/100 BaseT PHY",0x0040,0x61e4, &bcm_5201_ops,0},
-       {"Broadcom BCM5222 10/100 BaseT PHY",0x0040,0x6322, &bcm_5201_ops,1},
-       {"NS DP83847 PHY", 0x2000, 0x5c30, &bcm_5201_ops ,0},
-       {"AMD 79C901 HomePNA PHY",0x0000,0x35c8, &am79c901_ops,0},
-       {"AMD 79C874 10/100 BaseT PHY",0x0022,0x561b, &am79c874_ops,0},
-       {"LSI 80227 10/100 BaseT PHY",0x0016,0xf840, &lsi_80227_ops,0},
-       {"Intel LXT971A Dual Speed PHY",0x0013,0x78e2, &lxt971a_ops,0},
-       {"Kendin KS8995M 10/100 BaseT PHY",0x0022,0x1450, &ks8995m_ops,0},
-       {"SMSC LAN83C185 10/100 BaseT PHY",0x0007,0xc0a3, &smsc_83C185_ops,0},
-#ifdef CONFIG_MIPS_BOSPORUS
-       {"Stub", 0x1234, 0x5678, &stub_ops },
+#if defined(AU1XXX_PHY0_BUSID) && (AU1XXX_PHY0_BUSID > 0)
+# error MAC0-associated PHY attached 2nd MACs MII bus not supported yet
 #endif
-       {0,},
-};
 
-static int mdio_read(struct net_device *dev, int phy_id, int reg)
+/*
+ * MII operations
+ */
+static int mdio_read(struct net_device *dev, int phy_addr, int reg)
 {
        struct au1000_private *aup = (struct au1000_private *) dev->priv;
-       volatile u32 *mii_control_reg;
-       volatile u32 *mii_data_reg;
+       volatile u32 *const mii_control_reg = &aup->mac->mii_control;
+       volatile u32 *const mii_data_reg = &aup->mac->mii_data;
        u32 timedout = 20;
        u32 mii_control;
 
-       #ifdef CONFIG_BCM5222_DUAL_PHY
-       /* First time we probe, it's for the mac0 phy.
-        * Since we haven't determined yet that we have a dual phy,
-        * aup->mii->mii_control_reg won't be setup and we'll
-        * default to the else statement.
-        * By the time we probe for the mac1 phy, the mii_control_reg
-        * will be setup to be the address of the mac0 phy control since
-        * both phys are controlled through mac0.
-        */
-       if (aup->mii && aup->mii->mii_control_reg) {
-               mii_control_reg = aup->mii->mii_control_reg;
-               mii_data_reg = aup->mii->mii_data_reg;
-       }
-       else if (au_macs[0]->mii && au_macs[0]->mii->mii_control_reg) {
-               /* assume both phys are controlled through mac0 */
-               mii_control_reg = au_macs[0]->mii->mii_control_reg;
-               mii_data_reg = au_macs[0]->mii->mii_data_reg;
-       }
-       else 
-       #endif
-       {
-               /* default control and data reg addresses */
-               mii_control_reg = &aup->mac->mii_control;
-               mii_data_reg = &aup->mac->mii_data;
-       }
-
        while (*mii_control_reg & MAC_MII_BUSY) {
                mdelay(1);
                if (--timedout == 0) {
@@ -835,7 +215,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int reg)
        }
 
        mii_control = MAC_SET_MII_SELECT_REG(reg) | 
-               MAC_SET_MII_SELECT_PHY(phy_id) | MAC_MII_READ;
+               MAC_SET_MII_SELECT_PHY(phy_addr) | MAC_MII_READ;
 
        *mii_control_reg = mii_control;
 
@@ -851,32 +231,14 @@ static int mdio_read(struct net_device *dev, int phy_id, int reg)
        return (int)*mii_data_reg;
 }
 
-static void mdio_write(struct net_device *dev, int phy_id, int reg, u16 value)
+static void mdio_write(struct net_device *dev, int phy_addr, int reg, u16 value)
 {
        struct au1000_private *aup = (struct au1000_private *) dev->priv;
-       volatile u32 *mii_control_reg;
-       volatile u32 *mii_data_reg;
+       volatile u32 *const mii_control_reg = &aup->mac->mii_control;
+       volatile u32 *const mii_data_reg = &aup->mac->mii_data;
        u32 timedout = 20;
        u32 mii_control;
 
-       #ifdef CONFIG_BCM5222_DUAL_PHY
-       if (aup->mii && aup->mii->mii_control_reg) {
-               mii_control_reg = aup->mii->mii_control_reg;
-               mii_data_reg = aup->mii->mii_data_reg;
-       }
-       else if (au_macs[0]->mii && au_macs[0]->mii->mii_control_reg) {
-               /* assume both phys are controlled through mac0 */
-               mii_control_reg = au_macs[0]->mii->mii_control_reg;
-               mii_data_reg = au_macs[0]->mii->mii_data_reg;
-       }
-       else 
-       #endif
-       {
-               /* default control and data reg addresses */
-               mii_control_reg = &aup->mac->mii_control;
-               mii_data_reg = &aup->mac->mii_data;
-       }
-
        while (*mii_control_reg & MAC_MII_BUSY) {
                mdelay(1);
                if (--timedout == 0) {
@@ -887,165 +249,145 @@ static void mdio_write(struct net_device *dev, int phy_id, int reg, u16 value)
        }
 
        mii_control = MAC_SET_MII_SELECT_REG(reg) | 
-               MAC_SET_MII_SELECT_PHY(phy_id) | MAC_MII_WRITE;
+               MAC_SET_MII_SELECT_PHY(phy_addr) | MAC_MII_WRITE;
 
        *mii_data_reg = value;
        *mii_control_reg = mii_control;
 }
 
+static int mdiobus_read(struct mii_bus *bus, int phy_addr, int regnum)
+{
+       /* WARNING: bus->phy_map[phy_addr].attached_dev == dev does
+        * _NOT_ hold (e.g. when PHY is accessed through other MAC's MII bus) */
+       struct net_device *const dev = bus->priv;
+
+       enable_mac(dev, 0); /* make sure the MAC associated with this
+                            * mii_bus is enabled */
+       return mdio_read(dev, phy_addr, regnum);
+}
 
-static void dump_mii(struct net_device *dev, int phy_id)
+static int mdiobus_write(struct mii_bus *bus, int phy_addr, int regnum,
+                        u16 value)
 {
-       int i, val;
+       struct net_device *const dev = bus->priv;
 
-       for (i = 0; i < 7; i++) {
-               if ((val = mdio_read(dev, phy_id, i)) >= 0)
-                       printk("%s: MII Reg %d=%x\n", dev->name, i, val);
-       }
-       for (i = 16; i < 25; i++) {
-               if ((val = mdio_read(dev, phy_id, i)) >= 0)
-                       printk("%s: MII Reg %d=%x\n", dev->name, i, val);
-       }
+       enable_mac(dev, 0); /* make sure the MAC associated with this
+                            * mii_bus is enabled */
+       mdio_write(dev, phy_addr, regnum, value);
+       return 0;
 }
 
-static int mii_probe (struct net_device * dev)
+static int mdiobus_reset(struct mii_bus *bus)
 {
-       struct au1000_private *aup = (struct au1000_private *) dev->priv;
-       int phy_addr;
-#ifdef CONFIG_MIPS_BOSPORUS
-       int phy_found=0;
-#endif
+       struct net_device *const dev = bus->priv;
 
-       /* search for total of 32 possible mii phy addresses */
-       for (phy_addr = 0; phy_addr < 32; phy_addr++) {
-               u16 mii_status;
-               u16 phy_id0, phy_id1;
-               int i;
+       enable_mac(dev, 0); /* make sure the MAC associated with this
+                            * mii_bus is enabled */
+       return 0;
+}
 
-               #ifdef CONFIG_BCM5222_DUAL_PHY
-               /* Mask the already found phy, try next one */
-               if (au_macs[0]->mii && au_macs[0]->mii->mii_control_reg) {
-                       if (au_macs[0]->phy_addr == phy_addr)
-                               continue;
-               }
-               #endif
-
-               mii_status = mdio_read(dev, phy_addr, MII_STATUS);
-               if (mii_status == 0xffff || mii_status == 0x0000)
-                       /* the mii is not accessable, try next one */
-                       continue;
-
-               phy_id0 = mdio_read(dev, phy_addr, MII_PHY_ID0);
-               phy_id1 = mdio_read(dev, phy_addr, MII_PHY_ID1);
-
-               /* search our mii table for the current mii */ 
-               for (i = 0; mii_chip_table[i].phy_id1; i++) {
-                       if (phy_id0 == mii_chip_table[i].phy_id0 &&
-                           phy_id1 == mii_chip_table[i].phy_id1) {
-                               struct mii_phy * mii_phy = aup->mii;
-
-                               printk(KERN_INFO "%s: %s at phy address %d\n",
-                                      dev->name, mii_chip_table[i].name, 
-                                      phy_addr);
-#ifdef CONFIG_MIPS_BOSPORUS
-                               phy_found = 1;
-#endif
-                               mii_phy->chip_info = mii_chip_table+i;
-                               aup->phy_addr = phy_addr;
-                               aup->want_autoneg = 1;
-                               aup->phy_ops = mii_chip_table[i].phy_ops;
-                               aup->phy_ops->phy_init(dev,phy_addr);
-
-                               // Check for dual-phy and then store required 
-                               // values and set indicators. We need to do 
-                               // this now since mdio_{read,write} need the 
-                               // control and data register addresses.
-                               #ifdef CONFIG_BCM5222_DUAL_PHY
-                               if ( mii_chip_table[i].dual_phy) {
-
-                                       /* assume both phys are controlled 
-                                        * through MAC0. Board specific? */
-                                       
-                                       /* sanity check */
-                                       if (!au_macs[0] || !au_macs[0]->mii)
-                                               return -1;
-                                       aup->mii->mii_control_reg = (u32 *)
-                                               &au_macs[0]->mac->mii_control;
-                                       aup->mii->mii_data_reg = (u32 *)
-                                               &au_macs[0]->mac->mii_data;
-                               }
-                               #endif
-                               goto found;
-                       }
+static int mii_probe (struct net_device *dev)
+{
+       struct au1000_private *const aup = (struct au1000_private *) dev->priv;
+       struct phy_device *phydev = NULL;
+
+#if defined(AU1XXX_PHY_STATIC_CONFIG)
+       BUG_ON(aup->mac_id < 0 || aup->mac_id > 1);
+
+       if(aup->mac_id == 0) { /* get PHY0 */
+# if defined(AU1XXX_PHY0_ADDR)
+               phydev = au_macs[AU1XXX_PHY0_BUSID]->mii_bus.phy_map[AU1XXX_PHY0_ADDR];
+# else
+               printk (KERN_INFO DRV_NAME ":%s: using PHY-less setup\n",
+                       dev->name);
+               return 0;
+# endif /* defined(AU1XXX_PHY0_ADDR) */
+       } else if (aup->mac_id == 1) { /* get PHY1 */
+# if defined(AU1XXX_PHY1_ADDR)
+               phydev = au_macs[AU1XXX_PHY1_BUSID]->mii_bus.phy_map[AU1XXX_PHY1_ADDR];
+# else
+               printk (KERN_INFO DRV_NAME ":%s: using PHY-less setup\n",
+                       dev->name);
+               return 0;
+# endif /* defined(AU1XXX_PHY1_ADDR) */
+       }
+
+#else /* defined(AU1XXX_PHY_STATIC_CONFIG) */
+       int phy_addr;
+
+       /* find the first (lowest address) PHY on the current MAC's MII bus */
+       for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
+               if (aup->mii_bus.phy_map[phy_addr]) {
+                       phydev = aup->mii_bus.phy_map[phy_addr];
+# if !defined(AU1XXX_PHY_SEARCH_HIGHEST_ADDR)
+                       break; /* break out with first one found */
+# endif
                }
-       }
-found:
-
-#ifdef CONFIG_MIPS_BOSPORUS
-       /* This is a workaround for the Micrel/Kendin 5 port switch
-          The second MAC doesn't see a PHY connected... so we need to
-          trick it into thinking we have one.
-               
-          If this kernel is run on another Au1500 development board
-          the stub will be found as well as the actual PHY. However,
-          the last found PHY will be used... usually at Addr 31 (Db1500).      
-       */
-       if ( (!phy_found) )
-       {
-               u16 phy_id0, phy_id1;
-               int i;
 
-               phy_id0 = 0x1234;
-               phy_id1 = 0x5678;
-
-               /* search our mii table for the current mii */ 
-               for (i = 0; mii_chip_table[i].phy_id1; i++) {
-                       if (phy_id0 == mii_chip_table[i].phy_id0 &&
-                           phy_id1 == mii_chip_table[i].phy_id1) {
-                               struct mii_phy * mii_phy;
-
-                               printk(KERN_INFO "%s: %s at phy address %d\n",
-                                      dev->name, mii_chip_table[i].name, 
-                                      phy_addr);
-                               mii_phy = kmalloc(sizeof(struct mii_phy), 
-                                               GFP_KERNEL);
-                               if (mii_phy) {
-                                       mii_phy->chip_info = mii_chip_table+i;
-                                       aup->phy_addr = phy_addr;
-                                       mii_phy->next = aup->mii;
-                                       aup->phy_ops = 
-                                               mii_chip_table[i].phy_ops;
-                                       aup->mii = mii_phy;
-                                       aup->phy_ops->phy_init(dev,phy_addr);
-                               } else {
-                                       printk(KERN_ERR "%s: out of memory\n", 
-                                                       dev->name);
-                                       return -1;
-                               }
-                               mii_phy->chip_info = mii_chip_table+i;
-                               aup->phy_addr = phy_addr;
-                               aup->phy_ops = mii_chip_table[i].phy_ops;
-                               aup->phy_ops->phy_init(dev,phy_addr);
-                               break;
-                       }
+# if defined(AU1XXX_PHY1_SEARCH_ON_MAC0)
+       /* try harder to find a PHY */
+       if (!phydev && (aup->mac_id == 1)) {
+               /* no PHY found, maybe we have a dual PHY? */
+               printk (KERN_INFO DRV_NAME ": no PHY found on MAC1, "
+                       "let's see if it's attached to MAC0...\n");
+
+               BUG_ON(!au_macs[0]);
+
+               /* find the first (lowest address) non-attached PHY on
+                * the MAC0 MII bus */
+               for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+                       struct phy_device *const tmp_phydev =
+                               au_macs[0]->mii_bus.phy_map[phy_addr];
+
+                       if (!tmp_phydev)
+                               continue; /* no PHY here... */
+
+                       if (tmp_phydev->attached_dev)
+                               continue; /* already claimed by MAC0 */
+
+                       phydev = tmp_phydev;
+                       break; /* found it */
                }
        }
-       if (aup->mac_id == 0) {
-               /* the Bosporus phy responds to addresses 0-5 but 
-                * 5 is the correct one.
-                */
-               aup->phy_addr = 5;
-       }
-#endif
+# endif /* defined(AU1XXX_PHY1_SEARCH_OTHER_BUS) */
 
-       if (aup->mii->chip_info == NULL) {
-               printk(KERN_ERR "%s: Au1x No known MII transceivers found!\n",
-                               dev->name);
+#endif /* defined(AU1XXX_PHY_STATIC_CONFIG) */
+       if (!phydev) {
+               printk (KERN_ERR DRV_NAME ":%s: no PHY found\n", dev->name);
                return -1;
        }
 
-       printk(KERN_INFO "%s: Using %s as default\n", 
-                       dev->name, aup->mii->chip_info->name);
+       /* now we are supposed to have a proper phydev, to attach to... */
+       BUG_ON(!phydev);
+       BUG_ON(phydev->attached_dev);
+
+       phydev = phy_connect(dev, phydev->dev.bus_id, &au1000_adjust_link, 0);
+
+       if (IS_ERR(phydev)) {
+               printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+               return PTR_ERR(phydev);
+       }
+
+       /* mask with MAC supported features */
+       phydev->supported &= (SUPPORTED_10baseT_Half
+                             | SUPPORTED_10baseT_Full
+                             | SUPPORTED_100baseT_Half
+                             | SUPPORTED_100baseT_Full
+                             | SUPPORTED_Autoneg
+                             /* | SUPPORTED_Pause | SUPPORTED_Asym_Pause */
+                             | SUPPORTED_MII
+                             | SUPPORTED_TP);
+
+       phydev->advertising = phydev->supported;
+
+       aup->old_link = 0;
+       aup->old_speed = 0;
+       aup->old_duplex = -1;
+       aup->phy_dev = phydev;
+
+       printk(KERN_INFO "%s: attached PHY driver [%s] "
+              "(mii_bus:phy_addr=%s, irq=%d)\n",
+              dev->name, phydev->drv->name, phydev->dev.bus_id, phydev->irq);
 
        return 0;
 }
@@ -1097,35 +439,38 @@ static void hard_stop(struct net_device *dev)
        au_sync_delay(10);
 }
 
-
-static void reset_mac(struct net_device *dev)
+static void enable_mac(struct net_device *dev, int force_reset)
 {
-       int i;
-       u32 flags;
+       unsigned long flags;
        struct au1000_private *aup = (struct au1000_private *) dev->priv;
 
-       if (au1000_debug > 4) 
-               printk(KERN_INFO "%s: reset mac, aup %x\n", 
-                               dev->name, (unsigned)aup);
-
        spin_lock_irqsave(&aup->lock, flags);
-       if (aup->timer.function == &au1000_timer) {/* check if timer initted */
-               del_timer(&aup->timer);
-       }
 
-       hard_stop(dev);
-       #ifdef CONFIG_BCM5222_DUAL_PHY
-       if (aup->mac_id != 0) {
-       #endif
-               /* If BCM5222, we can't leave MAC0 in reset because then 
-                * we can't access the dual phy for ETH1 */
+       if(force_reset || (!aup->mac_enabled)) {
                *aup->enable = MAC_EN_CLOCK_ENABLE;
                au_sync_delay(2);
-               *aup->enable = 0;
+               *aup->enable = (MAC_EN_RESET0 | MAC_EN_RESET1 | MAC_EN_RESET2
+                               | MAC_EN_CLOCK_ENABLE);
                au_sync_delay(2);
-       #ifdef CONFIG_BCM5222_DUAL_PHY
+
+               aup->mac_enabled = 1;
        }
-       #endif
+
+       spin_unlock_irqrestore(&aup->lock, flags);
+}
+
+static void reset_mac_unlocked(struct net_device *dev)
+{
+       struct au1000_private *const aup = (struct au1000_private *) dev->priv;
+       int i;
+
+       hard_stop(dev);
+
+       *aup->enable = MAC_EN_CLOCK_ENABLE;
+       au_sync_delay(2);
+       *aup->enable = 0;
+       au_sync_delay(2);
+
        aup->tx_full = 0;
        for (i = 0; i < NUM_RX_DMA; i++) {
                /* reset control bits */
@@ -1135,9 +480,26 @@ static void reset_mac(struct net_device *dev)
                /* reset control bits */
                aup->tx_dma_ring[i]->buff_stat &= ~0xf;
        }
-       spin_unlock_irqrestore(&aup->lock, flags);
+
+       aup->mac_enabled = 0;
+
 }
 
+static void reset_mac(struct net_device *dev)
+{
+       struct au1000_private *const aup = (struct au1000_private *) dev->priv;
+       unsigned long flags;
+
+       if (au1000_debug > 4)
+               printk(KERN_INFO "%s: reset mac, aup %x\n",
+                      dev->name, (unsigned)aup);
+
+       spin_lock_irqsave(&aup->lock, flags);
+
+       reset_mac_unlocked (dev);
+
+       spin_unlock_irqrestore(&aup->lock, flags);
+}
 
 /* 
  * Setup the receive and transmit "rings".  These pointers are the addresses
@@ -1160,12 +522,27 @@ setup_hw_rings(struct au1000_private *aup, u32 rx_base, u32 tx_base)
 }
 
 static struct {
-       int port;
        u32 base_addr;
        u32 macen_addr;
        int irq;
        struct net_device *dev;
-} iflist[2];
+} iflist[2] = {
+#ifdef CONFIG_SOC_AU1000
+       {AU1000_ETH0_BASE, AU1000_MAC0_ENABLE, AU1000_MAC0_DMA_INT},
+       {AU1000_ETH1_BASE, AU1000_MAC1_ENABLE, AU1000_MAC1_DMA_INT}
+#endif
+#ifdef CONFIG_SOC_AU1100
+       {AU1100_ETH0_BASE, AU1100_MAC0_ENABLE, AU1100_MAC0_DMA_INT}
+#endif
+#ifdef CONFIG_SOC_AU1500
+       {AU1500_ETH0_BASE, AU1500_MAC0_ENABLE, AU1500_MAC0_DMA_INT},
+       {AU1500_ETH1_BASE, AU1500_MAC1_ENABLE, AU1500_MAC1_DMA_INT}
+#endif
+#ifdef CONFIG_SOC_AU1550
+       {AU1550_ETH0_BASE, AU1550_MAC0_ENABLE, AU1550_MAC0_DMA_INT},
+       {AU1550_ETH1_BASE, AU1550_MAC1_ENABLE, AU1550_MAC1_DMA_INT}
+#endif
+};
 
 static int num_ifs;
 
@@ -1176,58 +553,14 @@ static int num_ifs;
  */
 static int __init au1000_init_module(void)
 {
-       struct cpuinfo_mips *c = &current_cpu_data;
        int ni = (int)((au_readl(SYS_PINFUNC) & (u32)(SYS_PF_NI2)) >> 4);
        struct net_device *dev;
        int i, found_one = 0;
 
-      &n