[NET]: Make packet reception network namespace safe
[linux-2.6.git] / drivers / net / bonding / bond_main.c
index 3d7693d..e4e5fdc 100644 (file)
@@ -35,7 +35,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/interrupt.h>
@@ -61,6 +60,7 @@
 #include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/igmp.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -75,6 +75,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_bonding.h>
 #include <net/route.h>
+#include <net/net_namespace.h>
 #include "bonding.h"
 #include "bond_3ad.h"
 #include "bond_alb.h"
@@ -96,6 +97,7 @@ static char *lacp_rate        = NULL;
 static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
+static char *arp_validate = NULL;
 struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
@@ -127,6 +129,8 @@ module_param(arp_interval, int, 0);
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 module_param_array(arp_ip_target, charp, NULL, 0);
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
+module_param(arp_validate, charp, 0);
+MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -170,13 +174,21 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {      NULL,                   -1},
 };
 
+struct bond_parm_tbl arp_validate_tbl[] = {
+{      "none",                 BOND_ARP_VALIDATE_NONE},
+{      "active",               BOND_ARP_VALIDATE_ACTIVE},
+{      "backup",               BOND_ARP_VALIDATE_BACKUP},
+{      "all",                  BOND_ARP_VALIDATE_ALL},
+{      NULL,                   -1},
+};
+
 /*-------------------------- Forward declarations ---------------------------*/
 
 static void bond_send_gratuitous_arp(struct bonding *bond);
 
 /*---------------------------- General routines -----------------------------*/
 
-const char *bond_mode_name(int mode)
+static const char *bond_mode_name(int mode)
 {
        switch (mode) {
        case BOND_MODE_ROUNDROBIN :
@@ -478,9 +490,9 @@ static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
                        /* Save and then restore vlan_dev in the grp array,
                         * since the slave's driver might clear it.
                         */
-                       vlan_dev = bond->vlgrp->vlan_devices[vid];
+                       vlan_dev = vlan_group_get_device(bond->vlgrp, vid);
                        slave_dev->vlan_rx_kill_vid(slave_dev, vid);
-                       bond->vlgrp->vlan_devices[vid] = vlan_dev;
+                       vlan_group_set_device(bond->vlgrp, vid, vlan_dev);
                }
        }
 
@@ -540,9 +552,9 @@ static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *s
                /* Save and then restore vlan_dev in the grp array,
                 * since the slave's driver might clear it.
                 */
-               vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+               vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
                slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
-               bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev;
+               vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev);
        }
 
 unreg:
@@ -602,38 +614,20 @@ down:
 static int bond_update_speed_duplex(struct slave *slave)
 {
        struct net_device *slave_dev = slave->dev;
-       static int (* ioctl)(struct net_device *, struct ifreq *, int);
-       struct ifreq ifr;
        struct ethtool_cmd etool;
+       int res;
 
        /* Fake speed and duplex */
        slave->speed = SPEED_100;
        slave->duplex = DUPLEX_FULL;
 
-       if (slave_dev->ethtool_ops) {
-               int res;
-
-               if (!slave_dev->ethtool_ops->get_settings) {
-                       return -1;
-               }
-
-               res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
-               if (res < 0) {
-                       return -1;
-               }
-
-               goto verify;
-       }
+       if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings)
+               return -1;
 
-       ioctl = slave_dev->do_ioctl;
-       strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
-       etool.cmd = ETHTOOL_GSET;
-       ifr.ifr_data = (char*)&etool;
-       if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) {
+       res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool);
+       if (res < 0)
                return -1;
-       }
 
-verify:
        switch (etool.speed) {
        case SPEED_10:
        case SPEED_100:
@@ -679,7 +673,6 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_de
        static int (* ioctl)(struct net_device *, struct ifreq *, int);
        struct ifreq ifr;
        struct mii_ioctl_data *mii;
-       struct ethtool_value etool;
 
        if (bond->params.use_carrier) {
                return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
@@ -710,9 +703,10 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_de
                }
        }
 
-       /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */
-       /* for a period of time so we attempt to get link status   */
-       /* from it last if the above MII ioctls fail...            */
+       /*
+        * Some drivers cache ETHTOOL_GLINK for a period of time so we only
+        * attempt to get link status from it if the above MII ioctls fail.
+        */
        if (slave_dev->ethtool_ops) {
                if (slave_dev->ethtool_ops->get_link) {
                        u32 link;
@@ -723,23 +717,9 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_de
                }
        }
 
-       if (ioctl) {
-               strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
-               etool.cmd = ETHTOOL_GLINK;
-               ifr.ifr_data = (char*)&etool;
-               if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) {
-                       if (etool.data == 1) {
-                               return BMSR_LSTATUS;
-                       } else {
-                               dprintk("SIOCETHTOOL shows link down\n");
-                               return 0;
-                       }
-               }
-       }
-
        /*
         * If reporting, report that either there's no dev->do_ioctl,
-        * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we
+        * or both SIOCGMIIREG and get_link failed (meaning that we
         * cannot report link status).  If not reporting, pretend
         * we're ok.
         */
@@ -851,6 +831,28 @@ static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
        }
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+       struct in_device *in_dev;
+       struct ip_mc_list *im;
+
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(bond->dev);
+       if (in_dev) {
+               for (im = in_dev->mc_list; im; im = im->next) {
+                       ip_mc_rejoin_group(im);
+               }
+       }
+
+       rcu_read_unlock();
+}
+
 /*
  * Totally destroys the mc_list in bond
  */
@@ -864,6 +866,7 @@ static void bond_mc_list_destroy(struct bonding *bond)
                kfree(dmi);
                dmi = bond->mc_list;
        }
+        bond->mc_list = NULL;
 }
 
 /*
@@ -957,6 +960,7 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct
                for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) {
                        dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
                }
+               bond_resend_igmp_join_requests(bond);
        }
 }
 
@@ -1189,7 +1193,8 @@ static void bond_detach_slave(struct bonding *bond, struct slave *slave)
 
 /*---------------------------------- IOCTL ----------------------------------*/
 
-int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev)
+static int bond_sethwaddr(struct net_device *bond_dev,
+                         struct net_device *slave_dev)
 {
        dprintk("bond_dev=%p\n", bond_dev);
        dprintk("slave_dev=%p\n", slave_dev);
@@ -1198,40 +1203,37 @@ int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev)
        return 0;
 }
 
-#define BOND_INTERSECT_FEATURES \
-       (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO)
+#define BOND_VLAN_FEATURES \
+       (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \
+        NETIF_F_HW_VLAN_FILTER)
 
 /* 
  * Compute the common dev->feature set available to all slaves.  Some
- * feature bits are managed elsewhere, so preserve feature bits set on
- * master device that are not part of the examined set.
+ * feature bits are managed elsewhere, so preserve those feature bits
+ * on the master device.
  */
 static int bond_compute_features(struct bonding *bond)
 {
-       unsigned long features = BOND_INTERSECT_FEATURES;
        struct slave *slave;
        struct net_device *bond_dev = bond->dev;
+       unsigned long features = bond_dev->features;
+       unsigned short max_hard_header_len = ETH_HLEN;
        int i;
 
-       bond_for_each_slave(bond, slave, i)
-               features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
-
-       if ((features & NETIF_F_SG) && 
-           !(features & NETIF_F_ALL_CSUM))
-               features &= ~NETIF_F_SG;
-
-       /* 
-        * features will include NETIF_F_TSO (NETIF_F_UFO) iff all 
-        * slave devices support NETIF_F_TSO (NETIF_F_UFO), which 
-        * implies that all slaves also support scatter-gather 
-        * (NETIF_F_SG), which implies that features also includes 
-        * NETIF_F_SG. So no need to check whether we have an  
-        * illegal combination of NETIF_F_{TSO,UFO} and 
-        * !NETIF_F_SG 
-        */
+       features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
+       features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+                   NETIF_F_GSO_MASK | NETIF_F_NO_CSUM;
+
+       bond_for_each_slave(bond, slave, i) {
+               features = netdev_compute_features(features,
+                                                  slave->dev->features);
+               if (slave->dev->hard_header_len > max_hard_header_len)
+                       max_hard_header_len = slave->dev->hard_header_len;
+       }
 
-       features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES);
+       features |= (bond_dev->features & BOND_VLAN_FEATURES);
        bond_dev->features = features;
+       bond_dev->hard_header_len = max_hard_header_len;
 
        return 0;
 }
@@ -1320,14 +1322,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                goto err_undo_flags;
        }
 
-       new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL);
+       new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
        if (!new_slave) {
                res = -ENOMEM;
                goto err_undo_flags;
        }
 
-       memset(new_slave, 0, sizeof(struct slave));
-
        /* save slave's original flags before calling
         * netdev_set_master and dev_open
         */
@@ -1352,6 +1352,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                goto err_free;
        }
 
+       res = netdev_set_master(slave_dev, bond_dev);
+       if (res) {
+               dprintk("Error %d calling netdev_set_master\n", res);
+               goto err_close;
+       }
        /* open the slave since the application closed it */
        res = dev_open(slave_dev);
        if (res) {
@@ -1359,13 +1364,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                goto err_restore_mac;
        }
 
-       res = netdev_set_master(slave_dev, bond_dev);
-       if (res) {
-               dprintk("Error %d calling netdev_set_master\n", res);
-               goto err_close;
-       }
-
        new_slave->dev = slave_dev;
+       slave_dev->priv_flags |= IFF_BONDING;
 
        if ((bond->params.mode == BOND_MODE_TLB) ||
            (bond->params.mode == BOND_MODE_ALB)) {
@@ -1418,6 +1418,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
        bond_compute_features(bond);
 
+       new_slave->last_arp_rx = jiffies;
+
        if (bond->params.miimon && !bond->params.use_carrier) {
                link_reporting = bond_check_dev_link(bond, slave_dev, 1);
 
@@ -1494,29 +1496,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
        switch (bond->params.mode) {
        case BOND_MODE_ACTIVEBACKUP:
-               /* if we're in active-backup mode, we need one and
-                * only one active interface. The backup interfaces
-                * will have their SLAVE_INACTIVE flag set because we
-                * need them to be drop all packets. Thus, since we
-                * guarantee that curr_active_slave always point to
-                * the last usable interface, we just have to verify
-                * this interface's flag.
-                */
-               if (((!bond->curr_active_slave) ||
-                    (bond->curr_active_slave->dev->priv_flags & IFF_SLAVE_INACTIVE)) &&
-                   (new_slave->link != BOND_LINK_DOWN)) {
-                       /* first slave or no active slave yet, and this link
-                          is OK, so make this interface the active one */
-                       bond_change_active_slave(bond, new_slave);
-                       printk(KERN_INFO DRV_NAME
-                              ": %s: first active interface up!\n",
-                              bond->dev->name);
-                       netif_carrier_on(bond->dev);
-
-               } else {
-                       dprintk("This is just a backup slave\n");
-                       bond_set_slave_inactive_flags(new_slave);
-               }
+               bond_set_slave_inactive_flags(new_slave);
+               bond_select_active_slave(bond);
                break;
        case BOND_MODE_8023AD:
                /* in 802.3ad mode, the internal mechanism
@@ -1779,7 +1760,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
        dev_set_mac_address(slave_dev, &addr);
 
        slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-                                  IFF_SLAVE_INACTIVE);
+                                  IFF_SLAVE_INACTIVE | IFF_BONDING |
+                                  IFF_SLAVE_NEEDARP);
 
        kfree(slave);
 
@@ -2253,7 +2235,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev)
 {
        struct in_device *idev;
        struct in_ifaddr *ifa;
-       u32 addr = 0;
+       __be32 addr = 0;
 
        if (!dev)
                return 0;
@@ -2292,6 +2274,25 @@ static int bond_has_ip(struct bonding *bond)
        return 0;
 }
 
+static int bond_has_this_ip(struct bonding *bond, u32 ip)
+{
+       struct vlan_entry *vlan, *vlan_next;
+
+       if (ip == bond->master_ip)
+               return 1;
+
+       if (list_empty(&bond->vlan_list))
+               return 0;
+
+       list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+                                vlan_list) {
+               if (ip == vlan->vlan_ip)
+                       return 1;
+       }
+
+       return 0;
+}
+
 /*
  * We go to the (large) trouble of VLAN tagging ARP frames because
  * switches in VLAN mode (especially if ports are configured as
@@ -2375,7 +2376,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
                vlan_id = 0;
                list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
                                         vlan_list) {
-                       vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+                       vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
                        if (vlan_dev == rt->u.dst.dev) {
                                vlan_id = vlan->vlan_id;
                                dprintk("basa: vlan match on %s %d\n",
@@ -2422,7 +2423,7 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
        }
 
        list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-               vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+               vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
                if (vlan->vlan_ip) {
                        bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip,
                                      vlan->vlan_ip, vlan->vlan_id);
@@ -2430,6 +2431,96 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
        }
 }
 
+static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
+{
+       int i;
+       u32 *targets = bond->params.arp_targets;
+
+       targets = bond->params.arp_targets;
+       for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
+               dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
+                       "%u.%u.%u.%u bhti(tip) %d\n",
+                      NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
+                      bond_has_this_ip(bond, tip));
+               if (sip == targets[i]) {
+                       if (bond_has_this_ip(bond, tip))
+                               slave->last_arp_rx = jiffies;
+                       return;
+               }
+       }
+}
+
+static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+       struct arphdr *arp;
+       struct slave *slave;
+       struct bonding *bond;
+       unsigned char *arp_ptr;
+       u32 sip, tip;
+
+       if (dev->nd_net != &init_net)
+               goto out;
+
+       if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
+               goto out;
+
+       bond = dev->priv;
+       read_lock(&bond->lock);
+
+       dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
+               bond->dev->name, skb->dev ? skb->dev->name : "NULL",
+               orig_dev ? orig_dev->name : "NULL");
+
+       slave = bond_get_slave_by_dev(bond, orig_dev);
+       if (!slave || !slave_do_arp_validate(bond, slave))
+               goto out_unlock;
+
+       /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
+       if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+                                (2 * dev->addr_len) +
+                                (2 * sizeof(u32)))))
+               goto out_unlock;
+
+       arp = arp_hdr(skb);
+       if (arp->ar_hln != dev->addr_len ||
+           skb->pkt_type == PACKET_OTHERHOST ||
+           skb->pkt_type == PACKET_LOOPBACK ||
+           arp->ar_hrd != htons(ARPHRD_ETHER) ||
+           arp->ar_pro != htons(ETH_P_IP) ||
+           arp->ar_pln != 4)
+               goto out_unlock;
+
+       arp_ptr = (unsigned char *)(arp + 1);
+       arp_ptr += dev->addr_len;
+       memcpy(&sip, arp_ptr, 4);
+       arp_ptr += 4 + dev->addr_len;
+       memcpy(&tip, arp_ptr, 4);
+
+       dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
+               " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
+               slave->state, bond->params.arp_validate,
+               slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));
+
+       /*
+        * Backup slaves won't see the ARP reply, but do come through
+        * here for each ARP probe (so we swap the sip/tip to validate
+        * the probe).  In a "redundant switch, common router" type of
+        * configuration, the ARP probe will (hopefully) travel from
+        * the active, through one switch, the router, then the other
+        * switch before reaching the backup.
+        */
+       if (slave->state == BOND_STATE_ACTIVE)
+               bond_validate_arp(bond, slave, sip, tip);
+       else
+               bond_validate_arp(bond, slave, tip, sip);
+
+out_unlock:
+       read_unlock(&bond->lock);
+out:
+       dev_kfree_skb(skb);
+       return NET_RX_SUCCESS;
+}
+
 /*
  * this function is called regularly to monitor each slave's link
  * ensuring that traffic is being sent and received when arp monitoring
@@ -2594,7 +2685,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
         */
        bond_for_each_slave(bond, slave, i) {
                if (slave->link != BOND_LINK_UP) {
-                       if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) {
+                       if ((jiffies - slave_last_rx(bond, slave)) <=
+                            delta_in_ticks) {
 
                                slave->link = BOND_LINK_UP;
 
@@ -2639,7 +2731,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 
                        if ((slave != bond->curr_active_slave) &&
                            (!bond->current_arp_slave) &&
-                           (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
+                           (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
                             bond_has_ip(bond))) {
                                /* a backup slave has gone down; three times
                                 * the delta allows the current slave to be
@@ -2686,7 +2778,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
                 * if it is up and needs to take over as the curr_active_slave
                 */
                if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
-           (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
+           (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
             bond_has_ip(bond))) &&
                    ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
 
@@ -3009,7 +3101,7 @@ static int bond_info_open(struct inode *inode, struct file *file)
        return res;
 }
 
-static struct file_operations bond_info_fops = {
+static const struct file_operations bond_info_fops = {
        .owner   = THIS_MODULE,
        .open    = bond_info_open,
        .read    = seq_read,
@@ -3056,7 +3148,7 @@ static void bond_create_proc_dir(void)
 {
        int len = strlen(DRV_NAME);
 
-       for (bond_proc_dir = proc_net->subdir; bond_proc_dir;
+       for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir;
             bond_proc_dir = bond_proc_dir->next) {
                if ((bond_proc_dir->namelen == len) &&
                    !memcmp(bond_proc_dir->name, DRV_NAME, len)) {
@@ -3065,7 +3157,7 @@ static void bond_create_proc_dir(void)
        }
 
        if (!bond_proc_dir) {
-               bond_proc_dir = proc_mkdir(DRV_NAME, proc_net);
+               bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
                if (bond_proc_dir) {
                        bond_proc_dir->owner = THIS_MODULE;
                } else {
@@ -3100,7 +3192,7 @@ static void bond_destroy_proc_dir(void)
                        bond_proc_dir->owner = NULL;
                }
        } else {
-               remove_proc_entry(DRV_NAME, proc_net);
+               remove_proc_entry(DRV_NAME, init_net.proc_net);
                bond_proc_dir = NULL;
        }
 }
@@ -3211,6 +3303,9 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v
                (event_dev ? event_dev->name : "None"),
                event);
 
+       if (!(event_dev->priv_flags & IFF_BONDING))
+               return NOTIFY_DONE;
+
        if (event_dev->flags & IFF_MASTER) {
                dprintk("IFF_MASTER\n");
                return bond_master_netdev_event(event, event_dev);
@@ -3258,7 +3353,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
 
                list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
                                         vlan_list) {
-                       vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id];
+                       vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
                        if (vlan_dev == event_dev) {
                                switch (event) {
                                case NETDEV_UP:
@@ -3306,10 +3401,31 @@ static void bond_unregister_lacpdu(struct bonding *bond)
        dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
 }
 
+void bond_register_arp(struct bonding *bond)
+{
+       struct packet_type *pt = &bond->arp_mon_pt;
+
+       if (pt->type)
+               return;
+
+       pt->type = htons(ETH_P_ARP);
+       pt->dev = bond->dev;
+       pt->func = bond_arp_rcv;
+       dev_add_pack(pt);
+}
+
+void bond_unregister_arp(struct bonding *bond)
+{
+       struct packet_type *pt = &bond->arp_mon_pt;
+
+       dev_remove_pack(pt);
+       pt->type = 0;
+}
+
 /*---------------------------- Hashing Policies -----------------------------*/
 
 /*
- * Hash for the the output device based upon layer 3 and layer 4 data. If
+ * Hash for the output device based upon layer 3 and layer 4 data. If
  * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
  * altogether not IP, mimic bond_xmit_hash_policy_l2()
  */
@@ -3317,7 +3433,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
                                    struct net_device *bond_dev, int count)
 {
        struct ethhdr *data = (struct ethhdr *)skb->data;
-       struct iphdr *iph = skb->nh.iph;
+       struct iphdr *iph = ip_hdr(skb);
        u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
        int layer4_xor = 0;
 
@@ -3392,6 +3508,9 @@ static int bond_open(struct net_device *bond_dev)
                } else {
                        arp_timer->function = (void *)&bond_loadbalance_arp_mon;
                }
+               if (bond->params.arp_validate)
+                       bond_register_arp(bond);
+
                add_timer(arp_timer);
        }
 
@@ -3419,9 +3538,11 @@ static int bond_close(struct net_device *bond_dev)
                bond_unregister_lacpdu(bond);
        }
 
+       if (bond->params.arp_validate)
+               bond_unregister_arp(bond);
+
        write_lock_bh(&bond->lock);
 
-       bond_mc_list_destroy(bond);
 
        /* signal timers not to re-arm */
        bond->kill_timers = 1;
@@ -3452,8 +3573,6 @@ static int bond_close(struct net_device *bond_dev)
                break;
        }
 
-       /* Release the bonded slaves */
-       bond_release_all(bond_dev);
 
        if ((bond->params.mode == BOND_MODE_TLB) ||
            (bond->params.mode == BOND_MODE_ALB)) {
@@ -3479,7 +3598,6 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 
        bond_for_each_slave(bond, slave, i) {
                sstats = slave->dev->get_stats(slave->dev);
-
                stats->rx_packets += sstats->rx_packets;
                stats->rx_bytes += sstats->rx_bytes;
                stats->rx_errors += sstats->rx_errors;
@@ -3548,7 +3666,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
                        mii->val_out = 0;
                        read_lock_bh(&bond->lock);
                        read_lock(&bond->curr_slave_lock);
-                       if (bond->curr_active_slave) {
+                       if (netif_carrier_ok(bond->dev)) {
                                mii->val_out = BMSR_LSTATUS;
                        }
                        read_unlock(&bond->curr_slave_lock);
@@ -3878,42 +3996,6 @@ out:
        return 0;
 }
 
-static void bond_activebackup_xmit_copy(struct sk_buff *skb,
-                                        struct bonding *bond,
-                                        struct slave *slave)
-{
-       struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
-       struct ethhdr *eth_data;
-       u8 *hwaddr;
-       int res;
-
-       if (!skb2) {
-               printk(KERN_ERR DRV_NAME ": Error: "
-                      "bond_activebackup_xmit_copy(): skb_copy() failed\n");
-               return;
-       }
-
-       skb2->mac.raw = (unsigned char *)skb2->data;
-       eth_data = eth_hdr(skb2);
-
-       /* Pick an appropriate source MAC address
-        *      -- use slave's perm MAC addr, unless used by bond
-        *      -- otherwise, borrow active slave's perm MAC addr
-        *         since that will not be used
-        */
-       hwaddr = slave->perm_hwaddr;
-       if (!memcmp(eth_data->h_source, hwaddr, ETH_ALEN))
-               hwaddr = bond->curr_active_slave->perm_hwaddr;
-
-       /* Set source MAC address appropriately */
-       memcpy(eth_data->h_source, hwaddr, ETH_ALEN);
-
-       res = bond_dev_queue_xmit(bond, skb2, slave->dev);
-       if (res)
-               dev_kfree_skb(skb2);
-
-       return;
-}
 
 /*
  * in active-backup mode, we know that bond->curr_active_slave is always valid if
@@ -3934,21 +4016,6 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
        if (!bond->curr_active_slave)
                goto out;
 
-       /* Xmit IGMP frames on all slaves to ensure rapid fail-over
-          for multicast traffic on snooping switches */
-       if (skb->protocol == __constant_htons(ETH_P_IP) &&
-           skb->nh.iph->protocol == IPPROTO_IGMP) {
-               struct slave *slave, *active_slave;
-               int i;
-
-               active_slave = bond->curr_active_slave;
-               bond_for_each_slave_from_to(bond, slave, i, active_slave->next,
-                                           active_slave->prev)
-                       if (IS_UP(slave->dev) &&
-                           (slave->link == BOND_LINK_UP))
-                               bond_activebackup_xmit_copy(skb, bond, slave);
-       }
-
        res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
 
 out:
@@ -4180,6 +4247,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
        /* Initialize the device options */
        bond_dev->tx_queue_len = 0;
        bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
+       bond_dev->priv_flags |= IFF_BONDING;
 
        /* At first, we block adding VLANs. That's the only way to
         * prevent problems that occur when adding VLANs over an
@@ -4238,8 +4306,11 @@ static void bond_free_all(void)
        list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
                struct net_device *bond_dev = bond->dev;
 
-               unregister_netdevice(bond_dev);
+               bond_mc_list_destroy(bond);
+               /* Release the bonded slaves */
+               bond_release_all(bond_dev);
                bond_deinit(bond_dev);
+               unregister_netdevice(bond_dev);
        }
 
 #ifdef CONFIG_PROC_FS
@@ -4271,6 +4342,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
 
 static int bond_check_params(struct bond_params *params)
 {
+       int arp_validate_value;
+
        /*
         * Convert string parameters.
         */
@@ -4474,6 +4547,29 @@ static int bond_check_params(struct bond_params *params)
                arp_interval = 0;
        }
 
+       if (arp_validate) {
+               if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
+                       printk(KERN_ERR DRV_NAME
+              ": arp_validate only supported in active-backup mode\n");
+                       return -EINVAL;
+               }
+               if (!arp_interval) {
+                       printk(KERN_ERR DRV_NAME
+                              ": arp_validate requires arp_interval\n");
+                       return -EINVAL;
+               }
+
+               arp_validate_value = bond_parse_parm(arp_validate,
+                                                    arp_validate_tbl);
+               if (arp_validate_value == -1) {
+                       printk(KERN_ERR DRV_NAME
+                              ": Error: invalid arp_validate \"%s\"\n",
+                              arp_validate == NULL ? "NULL" : arp_validate);
+                       return -EINVAL;
+               }
+       } else
+               arp_validate_value = 0;
+
        if (miimon) {
                printk(KERN_INFO DRV_NAME
                       ": MII link monitoring set to %d ms\n",
@@ -4482,8 +4578,10 @@ static int bond_check_params(struct bond_params *params)
                int i;
 
                printk(KERN_INFO DRV_NAME
-                      ": ARP monitoring set to %d ms with %d target(s):",
-                      arp_interval, arp_ip_count);
+                      ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
+                      arp_interval,
+                      arp_validate_tbl[arp_validate_value].modename,
+                      arp_ip_count);
 
                for (i = 0; i < arp_ip_count; i++)
                        printk (" %s", arp_ip_target[i]);
@@ -4517,6 +4615,7 @@ static int bond_check_params(struct bond_params *params)
        params->xmit_policy = xmit_hashtype;
        params->miimon = miimon;
        params->arp_interval = arp_interval;
+       params->arp_validate = arp_validate_value;
        params->updelay = updelay;
        params->downdelay = downdelay;
        params->use_carrier = use_carrier;
@@ -4533,7 +4632,10 @@ static int bond_check_params(struct bond_params *params)
        return 0;
 }
 
+static struct lock_class_key bonding_netdev_xmit_lock_key;
+
 /* Create a new bond based on the specified name and bonding parameters.
+ * If name is NULL, obtain a suitable "bond%d" name for us.
  * Caller must NOT hold rtnl_lock; we need to release it here before we
  * set up our sysfs entries.
  */
@@ -4543,7 +4645,8 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
        int res;
 
        rtnl_lock();
-       bond_dev = alloc_netdev(sizeof(struct bonding), name, ether_setup);
+       bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "",
+                               ether_setup);
        if (!bond_dev) {
                printk(KERN_ERR DRV_NAME
                       ": %s: eek! can't alloc netdev!\n",
@@ -4552,6 +4655,12 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
                goto out_rtnl;
        }
 
+       if (!name) {
+               res = dev_alloc_name(bond_dev, "bond%d");
+               if (res < 0)
+                       goto out_netdev;
+       }
+
        /* bond_init() must be called after dev_alloc_name() (for the
         * /proc files), but before register_netdevice(), because we
         * need to set function pointers.
@@ -4568,6 +4677,9 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
        if (res < 0) {
                goto out_bond;
        }
+
+       lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key);
+
        if (newbond)
                *newbond = bond_dev->priv;
 
@@ -4575,14 +4687,19 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
 
        rtnl_unlock(); /* allows sysfs registration of net device */
        res = bond_create_sysfs_entry(bond_dev->priv);
-       goto done;
+       if (res < 0) {
+               rtnl_lock();
+               goto out_bond;
+       }
+
+       return 0;
+
 out_bond:
        bond_deinit(bond_dev);
 out_netdev:
        free_netdev(bond_dev);
 out_rtnl:
        rtnl_unlock();
-done:
        return res;
 }
 
@@ -4590,7 +4707,6 @@ static int __init bonding_init(void)
 {
        int i;
        int res;
-       char new_bond_name[8];  /* Enough room for 999 bonds at init. */
 
        printk(KERN_INFO "%s", version);
 
@@ -4603,8 +4719,7 @@ static int __init bonding_init(void)
        bond_create_proc_dir();
 #endif
        for (i = 0; i < max_bonds; i++) {
-               sprintf(new_bond_name, "bond%d",i);
-               res = bond_create(new_bond_name,&bonding_defaults, NULL);
+               res = bond_create(NULL, &bonding_defaults, NULL);
                if (res)
                        goto err;
        }