]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - drivers/block/aoe/aoecmd.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6.git] / drivers / block / aoe / aoecmd.c
index 63c456082d8114f63d01b9f71241aafc6486ef69..64a223b0cc2229fb14250dd60fd572b8f8bd9c59 100644 (file)
@@ -1,51 +1,50 @@
-/* Copyright (c) 2006 Coraid, Inc.  See COPYING for GPL terms. */
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
 /*
  * aoecmd.c
  * Filesystem request handling methods
  */
 
+#include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/blkdev.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/genhd.h>
+#include <linux/moduleparam.h>
+#include <net/net_namespace.h>
 #include <asm/unaligned.h>
 #include "aoe.h"
 
-#define TIMERTICK (HZ / 10)
-#define MINTIMER (2 * TIMERTICK)
-#define MAXTIMER (HZ << 1)
-#define MAXWAIT (60 * 3)       /* After MAXWAIT seconds, give up and fail dev */
+static int aoe_deadsecs = 60 * 3;
+module_param(aoe_deadsecs, int, 0644);
+MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
 
-struct sk_buff *
+static int aoe_maxout = 16;
+module_param(aoe_maxout, int, 0644);
+MODULE_PARM_DESC(aoe_maxout,
+       "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
+
+static struct sk_buff *
 new_skb(ulong len)
 {
        struct sk_buff *skb;
 
        skb = alloc_skb(len, GFP_ATOMIC);
        if (skb) {
-               skb->nh.raw = skb->mac.raw = skb->data;
+               skb_reset_mac_header(skb);
+               skb_reset_network_header(skb);
                skb->protocol = __constant_htons(ETH_P_AOE);
-               skb->priority = 0;
-               skb_put(skb, len);
-               memset(skb->head, 0, len);
-               skb->next = skb->prev = NULL;
-
-               /* tell the network layer not to perform IP checksums
-                * or to get the NIC to do it
-                */
-               skb->ip_summed = CHECKSUM_NONE;
        }
        return skb;
 }
 
 static struct frame *
-getframe(struct aoedev *d, int tag)
+getframe(struct aoetgt *t, int tag)
 {
        struct frame *f, *e;
 
-       f = d->frames;
-       e = f + d->nframes;
+       f = t->frames;
+       e = f + t->nframes;
        for (; f<e; f++)
                if (f->tag == tag)
                        return f;
@@ -58,21 +57,21 @@ getframe(struct aoedev *d, int tag)
  * This driver reserves tag -1 to mean "unused frame."
  */
 static int
-newtag(struct aoedev *d)
+newtag(struct aoetgt *t)
 {
        register ulong n;
 
        n = jiffies & 0xffff;
-       return n |= (++d->lasttag & 0x7fff) << 16;
+       return n |= (++t->lasttag & 0x7fff) << 16;
 }
 
 static int
-aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
+aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 {
-       u32 host_tag = newtag(d);
+       u32 host_tag = newtag(t);
 
-       memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
-       memcpy(h->dst, d->addr, sizeof h->dst);
+       memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+       memcpy(h->dst, t->addr, sizeof h->dst);
        h->type = __constant_cpu_to_be16(ETH_P_AOE);
        h->verfl = AOE_HVER;
        h->major = cpu_to_be16(d->aoemajor);
@@ -95,42 +94,155 @@ put_lba(struct aoe_atahdr *ah, sector_t lba)
 }
 
 static void
-aoecmd_ata_rw(struct aoedev *d, struct frame *f)
+ifrotate(struct aoetgt *t)
+{
+       t->ifp++;
+       if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
+               t->ifp = t->ifs;
+       if (t->ifp->nd == NULL) {
+               printk(KERN_INFO "aoe: no interface to rotate to\n");
+               BUG();
+       }
+}
+
+static void
+skb_pool_put(struct aoedev *d, struct sk_buff *skb)
+{
+       __skb_queue_tail(&d->skbpool, skb);
+}
+
+static struct sk_buff *
+skb_pool_get(struct aoedev *d)
 {
+       struct sk_buff *skb = skb_peek(&d->skbpool);
+
+       if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
+               __skb_unlink(skb, &d->skbpool);
+               return skb;
+       }
+       if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
+           (skb = new_skb(ETH_ZLEN)))
+               return skb;
+
+       return NULL;
+}
+
+/* freeframe is where we do our load balancing so it's a little hairy. */
+static struct frame *
+freeframe(struct aoedev *d)
+{
+       struct frame *f, *e, *rf;
+       struct aoetgt **t;
+       struct sk_buff *skb;
+
+       if (d->targets[0] == NULL) {    /* shouldn't happen, but I'm paranoid */
+               printk(KERN_ERR "aoe: NULL TARGETS!\n");
+               return NULL;
+       }
+       t = d->tgt;
+       t++;
+       if (t >= &d->targets[NTARGETS] || !*t)
+               t = d->targets;
+       for (;;) {
+               if ((*t)->nout < (*t)->maxout
+               && t != d->htgt
+               && (*t)->ifp->nd) {
+                       rf = NULL;
+                       f = (*t)->frames;
+                       e = f + (*t)->nframes;
+                       for (; f < e; f++) {
+                               if (f->tag != FREETAG)
+                                       continue;
+                               skb = f->skb;
+                               if (!skb
+                               && !(f->skb = skb = new_skb(ETH_ZLEN)))
+                                       continue;
+                               if (atomic_read(&skb_shinfo(skb)->dataref)
+                                       != 1) {
+                                       if (!rf)
+                                               rf = f;
+                                       continue;
+                               }
+gotone:                                skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+                               skb_trim(skb, 0);
+                               d->tgt = t;
+                               ifrotate(*t);
+                               return f;
+                       }
+                       /* Work can be done, but the network layer is
+                          holding our precious packets.  Try to grab
+                          one from the pool. */
+                       f = rf;
+                       if (f == NULL) {        /* more paranoia */
+                               printk(KERN_ERR
+                                       "aoe: freeframe: %s.\n",
+                                       "unexpected null rf");
+                               d->flags |= DEVFL_KICKME;
+                               return NULL;
+                       }
+                       skb = skb_pool_get(d);
+                       if (skb) {
+                               skb_pool_put(d, f->skb);
+                               f->skb = skb;
+                               goto gotone;
+                       }
+                       (*t)->dataref++;
+                       if ((*t)->nout == 0)
+                               d->flags |= DEVFL_KICKME;
+               }
+               if (t == d->tgt)        /* we've looped and found nada */
+                       break;
+               t++;
+               if (t >= &d->targets[NTARGETS] || !*t)
+                       t = d->targets;
+       }
+       return NULL;
+}
+
+static int
+aoecmd_ata_rw(struct aoedev *d)
+{
+       struct frame *f;
        struct aoe_hdr *h;
        struct aoe_atahdr *ah;
        struct buf *buf;
+       struct bio_vec *bv;
+       struct aoetgt *t;
        struct sk_buff *skb;
        ulong bcnt;
-       register sector_t sector;
        char writebit, extbit;
 
        writebit = 0x10;
        extbit = 0x4;
 
+       f = freeframe(d);
+       if (f == NULL)
+               return 0;
+       t = *d->tgt;
        buf = d->inprocess;
-
-       sector = buf->sector;
-       bcnt = buf->bv_resid;
-       if (bcnt > d->maxbcnt)
-               bcnt = d->maxbcnt;
-
+       bv = buf->bv;
+       bcnt = t->ifp->maxbcnt;
+       if (bcnt == 0)
+               bcnt = DEFAULTBCNT;
+       if (bcnt > buf->bv_resid)
+               bcnt = buf->bv_resid;
        /* initialize the headers & frame */
        skb = f->skb;
-       h = (struct aoe_hdr *) skb->mac.raw;
+       h = (struct aoe_hdr *) skb_mac_header(skb);
        ah = (struct aoe_atahdr *) (h+1);
-       skb->len = sizeof *h + sizeof *ah;
+       skb_put(skb, sizeof *h + sizeof *ah);
        memset(h, 0, skb->len);
-       f->tag = aoehdr_atainit(d, h);
+       f->tag = aoehdr_atainit(d, t, h);
+       t->nout++;
        f->waited = 0;
        f->buf = buf;
-       f->bufaddr = buf->bufaddr;
+       f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
        f->bcnt = bcnt;
-       f->lba = sector;
+       f->lba = buf->sector;
 
        /* set up ata header */
        ah->scnt = bcnt >> 9;
-       put_lba(ah, sector);
+       put_lba(ah, buf->sector);
        if (d->flags & DEVFL_EXT) {
                ah->aflags |= AOEAFL_EXT;
        } else {
@@ -138,72 +250,67 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
                ah->lba3 &= 0x0f;
                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
        }
-
        if (bio_data_dir(buf->bio) == WRITE) {
-               skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-                       offset_in_page(f->bufaddr), bcnt);
+               skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
                ah->aflags |= AOEAFL_WRITE;
+               skb->len += bcnt;
+               skb->data_len = bcnt;
+               t->wpkts++;
        } else {
-               skb_shinfo(skb)->nr_frags = 0;
-               skb->len = ETH_ZLEN;
+               t->rpkts++;
                writebit = 0;
        }
 
-       ah->cmdstat = WIN_READ | writebit | extbit;
+       ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 
        /* mark all tracking fields and load out */
        buf->nframesout += 1;
-       buf->bufaddr += bcnt;
+       buf->bv_off += bcnt;
        buf->bv_resid -= bcnt;
-/* dprintk("bv_resid=%ld\n", buf->bv_resid); */
        buf->resid -= bcnt;
        buf->sector += bcnt >> 9;
        if (buf->resid == 0) {
                d->inprocess = NULL;
        } else if (buf->bv_resid == 0) {
-               buf->bv++;
-               buf->bv_resid = buf->bv->bv_len;
-               buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
-       }
-
-       skb->dev = d->ifp;
-       skb_get(skb);
-       skb->next = NULL;
-       if (d->sendq_hd)
-               d->sendq_tl->next = skb;
-       else
-               d->sendq_hd = skb;
-       d->sendq_tl = skb;
+               buf->bv = ++bv;
+               buf->bv_resid = bv->bv_len;
+               WARN_ON(buf->bv_resid == 0);
+               buf->bv_off = bv->bv_offset;
+       }
+
+       skb->dev = t->ifp->nd;
+       skb = skb_clone(skb, GFP_ATOMIC);
+       if (skb)
+               __skb_queue_tail(&d->sendq, skb);
+       return 1;
 }
 
 /* some callers cannot sleep, and they can call this function,
  * transmitting the packets later, when interrupts are on
  */
-static struct sk_buff *
-aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
+static void
+aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 {
        struct aoe_hdr *h;
        struct aoe_cfghdr *ch;
-       struct sk_buff *skb, *sl, *sl_tail;
+       struct sk_buff *skb;
        struct net_device *ifp;
 
-       sl = sl_tail = NULL;
-
        read_lock(&dev_base_lock);
-       for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
+       for_each_netdev(&init_net, ifp) {
                dev_hold(ifp);
                if (!is_aoe_netif(ifp))
-                       continue;
+                       goto cont;
 
                skb = new_skb(sizeof *h + sizeof *ch);
                if (skb == NULL) {
-                       iprintk("skb alloc failure\n");
-                       continue;
+                       printk(KERN_INFO "aoe: skb alloc failure\n");
+                       goto cont;
                }
+               skb_put(skb, sizeof *h + sizeof *ch);
                skb->dev = ifp;
-               if (sl_tail == NULL)
-                       sl_tail = skb;
-               h = (struct aoe_hdr *) skb->mac.raw;
+               __skb_queue_tail(queue, skb);
+               h = (struct aoe_hdr *) skb_mac_header(skb);
                memset(h, 0, sizeof *h + sizeof *ch);
 
                memset(h->dst, 0xff, sizeof h->dst);
@@ -214,48 +321,14 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
                h->minor = aoeminor;
                h->cmd = AOECMD_CFG;
 
-               skb->next = sl;
-               sl = skb;
+cont:
+               dev_put(ifp);
        }
        read_unlock(&dev_base_lock);
-
-       if (tail != NULL)
-               *tail = sl_tail;
-       return sl;
-}
-
-/* enters with d->lock held */
-void
-aoecmd_work(struct aoedev *d)
-{
-       struct frame *f;
-       struct buf *buf;
-
-       if (d->flags & DEVFL_PAUSE) {
-               if (!aoedev_isbusy(d))
-                       d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
-                                               d->aoeminor, &d->sendq_tl);
-               return;
-       }
-
-loop:
-       f = getframe(d, FREETAG);
-       if (f == NULL)
-               return;
-       if (d->inprocess == NULL) {
-               if (list_empty(&d->bufq))
-                       return;
-               buf = container_of(d->bufq.next, struct buf, bufs);
-               list_del(d->bufq.next);
-/*dprintk("bi_size=%ld\n", buf->bio->bi_size); */
-               d->inprocess = buf;
-       }
-       aoecmd_ata_rw(d, f);
-       goto loop;
 }
 
 static void
-rexmit(struct aoedev *d, struct frame *f)
+resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 {
        struct sk_buff *skb;
        struct aoe_hdr *h;
@@ -263,44 +336,48 @@ rexmit(struct aoedev *d, struct frame *f)
        char buf[128];
        u32 n;
 
-       n = newtag(d);
+       ifrotate(t);
+       n = newtag(t);
+       skb = f->skb;
+       h = (struct aoe_hdr *) skb_mac_header(skb);
+       ah = (struct aoe_atahdr *) (h+1);
 
        snprintf(buf, sizeof buf,
-               "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
-               "retransmit",
-               d->aoemajor, d->aoeminor, f->tag, jiffies, n);
+               "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+               "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
+               h->src, h->dst, t->nout);
        aoechr_error(buf);
 
-       skb = f->skb;
-       h = (struct aoe_hdr *) skb->mac.raw;
-       ah = (struct aoe_atahdr *) (h+1);
        f->tag = n;
        h->tag = cpu_to_be32(n);
-       memcpy(h->dst, d->addr, sizeof h->dst);
-       memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
-
-       n = DEFAULTBCNT / 512;
-       if (ah->scnt > n) {
-               ah->scnt = n;
-               if (ah->aflags & AOEAFL_WRITE)
+       memcpy(h->dst, t->addr, sizeof h->dst);
+       memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+
+       switch (ah->cmdstat) {
+       default:
+               break;
+       case ATA_CMD_PIO_READ:
+       case ATA_CMD_PIO_READ_EXT:
+       case ATA_CMD_PIO_WRITE:
+       case ATA_CMD_PIO_WRITE_EXT:
+               put_lba(ah, f->lba);
+
+               n = f->bcnt;
+               if (n > DEFAULTBCNT)
+                       n = DEFAULTBCNT;
+               ah->scnt = n >> 9;
+               if (ah->aflags & AOEAFL_WRITE) {
                        skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-                               offset_in_page(f->bufaddr), DEFAULTBCNT);
-               if (++d->lostjumbo > (d->nframes << 1))
-               if (d->maxbcnt != DEFAULTBCNT) {
-                       iprintk("too many lost jumbo - using 1KB frames.\n");
-                       d->maxbcnt = DEFAULTBCNT;
-                       d->flags |= DEVFL_MAXBCNT;
+                               offset_in_page(f->bufaddr), n);
+                       skb->len = sizeof *h + sizeof *ah + n;
+                       skb->data_len = n;
                }
        }
-
-       skb->dev = d->ifp;
-       skb_get(skb);
-       skb->next = NULL;
-       if (d->sendq_hd)
-               d->sendq_tl->next = skb;
-       else
-               d->sendq_hd = skb;
-       d->sendq_tl = skb;
+       skb->dev = t->ifp->nd;
+       skb = skb_clone(skb, GFP_ATOMIC);
+       if (skb == NULL)
+               return;
+       __skb_queue_tail(&d->sendq, skb);
 }
 
 static int
@@ -315,17 +392,98 @@ tsince(int tag)
        return n;
 }
 
+static struct aoeif *
+getif(struct aoetgt *t, struct net_device *nd)
+{
+       struct aoeif *p, *e;
+
+       p = t->ifs;
+       e = p + NAOEIFS;
+       for (; p < e; p++)
+               if (p->nd == nd)
+                       return p;
+       return NULL;
+}
+
+static struct aoeif *
+addif(struct aoetgt *t, struct net_device *nd)
+{
+       struct aoeif *p;
+
+       p = getif(t, NULL);
+       if (!p)
+               return NULL;
+       p->nd = nd;
+       p->maxbcnt = DEFAULTBCNT;
+       p->lost = 0;
+       p->lostjumbo = 0;
+       return p;
+}
+
+static void
+ejectif(struct aoetgt *t, struct aoeif *ifp)
+{
+       struct aoeif *e;
+       ulong n;
+
+       e = t->ifs + NAOEIFS - 1;
+       n = (e - ifp) * sizeof *ifp;
+       memmove(ifp, ifp+1, n);
+       e->nd = NULL;
+}
+
+static int
+sthtith(struct aoedev *d)
+{
+       struct frame *f, *e, *nf;
+       struct sk_buff *skb;
+       struct aoetgt *ht = *d->htgt;
+
+       f = ht->frames;
+       e = f + ht->nframes;
+       for (; f < e; f++) {
+               if (f->tag == FREETAG)
+                       continue;
+               nf = freeframe(d);
+               if (!nf)
+                       return 0;
+               skb = nf->skb;
+               *nf = *f;
+               f->skb = skb;
+               f->tag = FREETAG;
+               nf->waited = 0;
+               ht->nout--;
+               (*d->tgt)->nout++;
+               resend(d, *d->tgt, nf);
+       }
+       /* he's clean, he's useless.  take away his interfaces */
+       memset(ht->ifs, 0, sizeof ht->ifs);
+       d->htgt = NULL;
+       return 1;
+}
+
+static inline unsigned char
+ata_scnt(unsigned char *packet) {
+       struct aoe_hdr *h;
+       struct aoe_atahdr *ah;
+
+       h = (struct aoe_hdr *) packet;
+       ah = (struct aoe_atahdr *) (h+1);
+       return ah->scnt;
+}
+
 static void
 rexmit_timer(ulong vp)
 {
+       struct sk_buff_head queue;
        struct aoedev *d;
+       struct aoetgt *t, **tt, **te;
+       struct aoeif *ifp;
        struct frame *f, *e;
-       struct sk_buff *sl;
        register long timeout;
        ulong flags, n;
 
        d = (struct aoedev *) vp;
-       sl = NULL;
 
        /* timeout is always ~150% of the moving average */
        timeout = d->rttavg;
@@ -337,42 +495,113 @@ rexmit_timer(ulong vp)
                spin_unlock_irqrestore(&d->lock, flags);
                return;
        }
-       f = d->frames;
-       e = f + d->nframes;
-       for (; f<e; f++) {
-               if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
+       tt = d->targets;
+       te = tt + NTARGETS;
+       for (; tt < te && *tt; tt++) {
+               t = *tt;
+               f = t->frames;
+               e = f + t->nframes;
+               for (; f < e; f++) {
+                       if (f->tag == FREETAG
+                       || tsince(f->tag) < timeout)
+                               continue;
                        n = f->waited += timeout;
                        n /= HZ;
-                       if (n > MAXWAIT) { /* waited too long.  device failure. */
+                       if (n > aoe_deadsecs) {
+                               /* waited too long.  device failure. */
                                aoedev_downdev(d);
                                break;
                        }
-                       rexmit(d, f);
+
+                       if (n > HELPWAIT /* see if another target can help */
+                       && (tt != d->targets || d->targets[1]))
+                               d->htgt = tt;
+
+                       if (t->nout == t->maxout) {
+                               if (t->maxout > 1)
+                                       t->maxout--;
+                               t->lastwadj = jiffies;
+                       }
+
+                       ifp = getif(t, f->skb->dev);
+                       if (ifp && ++ifp->lost > (t->nframes << 1)
+                       && (ifp != t->ifs || t->ifs[1].nd)) {
+                               ejectif(t, ifp);
+                               ifp = NULL;
+                       }
+
+                       if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
+                       && ifp && ++ifp->lostjumbo > (t->nframes << 1)
+                       && ifp->maxbcnt != DEFAULTBCNT) {
+                               printk(KERN_INFO
+                                       "aoe: e%ld.%d: "
+                                       "too many lost jumbo on "
+                                       "%s:%pm - "
+                                       "falling back to %d frames.\n",
+                                       d->aoemajor, d->aoeminor,
+                                       ifp->nd->name, t->addr,
+                                       DEFAULTBCNT);
+                               ifp->maxbcnt = 0;
+                       }
+                       resend(d, t, f);
+               }
+
+               /* window check */
+               if (t->nout == t->maxout
+               && t->maxout < t->nframes
+               && (jiffies - t->lastwadj)/HZ > 10) {
+                       t->maxout++;
+                       t->lastwadj = jiffies;
                }
        }
 
-       sl = d->sendq_hd;
-       d->sendq_hd = d->sendq_tl = NULL;
-       if (sl) {
+       if (!skb_queue_empty(&d->sendq)) {
                n = d->rttavg <<= 1;
                if (n > MAXTIMER)
                        d->rttavg = MAXTIMER;
        }
 
+       if (d->flags & DEVFL_KICKME || d->htgt) {
+               d->flags &= ~DEVFL_KICKME;
+               aoecmd_work(d);
+       }
+
+       __skb_queue_head_init(&queue);
+       skb_queue_splice_init(&d->sendq, &queue);
+
        d->timer.expires = jiffies + TIMERTICK;
        add_timer(&d->timer);
 
        spin_unlock_irqrestore(&d->lock, flags);
 
-       aoenet_xmit(sl);
+       aoenet_xmit(&queue);
+}
+
+/* enters with d->lock held */
+void
+aoecmd_work(struct aoedev *d)
+{
+       struct buf *buf;
+loop:
+       if (d->htgt && !sthtith(d))
+               return;
+       if (d->inprocess == NULL) {
+               if (list_empty(&d->bufq))
+                       return;
+               buf = container_of(d->bufq.next, struct buf, bufs);
+               list_del(d->bufq.next);
+               d->inprocess = buf;
+       }
+       if (aoecmd_ata_rw(d))
+               goto loop;
 }
 
 /* this function performs work that has been deferred until sleeping is OK
  */
 void
-aoecmd_sleepwork(void *vp)
+aoecmd_sleepwork(struct work_struct *work)
 {
-       struct aoedev *d = (struct aoedev *) vp;
+       struct aoedev *d = container_of(work, struct aoedev, work);
 
        if (d->flags & DEVFL_GDALLOC)
                aoeblk_gdalloc(d);
@@ -382,7 +611,7 @@ aoecmd_sleepwork(void *vp)
                unsigned long flags;
                u64 ssize;
 
-               ssize = d->gd->capacity;
+               ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
 
                if (bd) {
@@ -399,22 +628,22 @@ aoecmd_sleepwork(void *vp)
 }
 
 static void
-ataid_complete(struct aoedev *d, unsigned char *id)
+ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 {
        u64 ssize;
        u16 n;
 
        /* word 83: command set supported */
-       n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
+       n = get_unaligned_le16(&id[83 << 1]);
 
        /* word 86: command set/feature enabled */
-       n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
+       n |= get_unaligned_le16(&id[86 << 1]);
 
        if (n & (1<<10)) {      /* bit 10: LBA 48 */
                d->flags |= DEVFL_EXT;
 
                /* word 100: number lba48 sectors */
-               ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
+               ssize = get_unaligned_le64(&id[100 << 1]);
 
                /* set as in ide-disk.c:init_idedisk_capacity */
                d->geo.cylinders = ssize;
@@ -425,33 +654,29 @@ ataid_complete(struct aoedev *d, unsigned char *id)
                d->flags &= ~DEVFL_EXT;
 
                /* number lba28 sectors */
-               ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
+               ssize = get_unaligned_le32(&id[60 << 1]);
 
                /* NOTE: obsolete in ATA 6 */
-               d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
-               d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
-               d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
+               d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
+               d->geo.heads = get_unaligned_le16(&id[55 << 1]);
+               d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
        }
 
        if (d->ssize != ssize)
-               iprintk("%012llx e%lu.%lu v%04x has %llu sectors\n",
-                       (unsigned long long)mac_addr(d->addr),
+               printk(KERN_INFO
+                       "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
+                       t->addr,
                        d->aoemajor, d->aoeminor,
                        d->fw_ver, (long long)ssize);
        d->ssize = ssize;
        d->geo.start = 0;
+       if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+               return;
        if (d->gd != NULL) {
-               d->gd->capacity = ssize;
+               set_capacity(d->gd, ssize);
                d->flags |= DEVFL_NEWSIZE;
-       } else {
-               if (d->flags & DEVFL_GDALLOC) {
-                       eprintk("can't schedule work for e%lu.%lu, %s\n",
-                              d->aoemajor, d->aoeminor,
-                              "it's already on!  This shouldn't happen.\n");
-                       return;
-               }
+       } else
                d->flags |= DEVFL_GDALLOC;
-       }
        schedule_work(&d->work);
 }
 
@@ -461,8 +686,15 @@ calc_rttavg(struct aoedev *d, int rtt)
        register long n;
 
        n = rtt;
-       if (n < MINTIMER)
-               n = MINTIMER;
+       if (n < 0) {
+               n = -rtt;
+               if (n < MINTIMER)
+                       n = MINTIMER;
+               else if (n > MAXTIMER)
+                       n = MAXTIMER;
+               d->mintimer += (n - d->mintimer) >> 1;
+       } else if (n < d->mintimer)
+               n = d->mintimer;
        else if (n > MAXTIMER)
                n = MAXTIMER;
 
@@ -471,22 +703,56 @@ calc_rttavg(struct aoedev *d, int rtt)
        d->rttavg += n >> 2;
 }
 
+static struct aoetgt *
+gettgt(struct aoedev *d, char *addr)
+{
+       struct aoetgt **t, **e;
+
+       t = d->targets;
+       e = t + NTARGETS;
+       for (; t < e && *t; t++)
+               if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
+                       return *t;
+       return NULL;
+}
+
+static inline void
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
+{
+       unsigned long n_sect = bio->bi_size >> 9;
+       const int rw = bio_data_dir(bio);
+       struct hd_struct *part;
+       int cpu;
+
+       cpu = part_stat_lock();
+       part = disk_map_sector_rcu(disk, sector);
+
+       part_stat_inc(cpu, part, ios[rw]);
+       part_stat_add(cpu, part, ticks[rw], duration);
+       part_stat_add(cpu, part, sectors[rw], n_sect);
+       part_stat_add(cpu, part, io_ticks, duration);
+
+       part_stat_unlock();
+}
+
 void
 aoecmd_ata_rsp(struct sk_buff *skb)
 {
+       struct sk_buff_head queue;
        struct aoedev *d;
-       struct aoe_hdr *hin;
+       struct aoe_hdr *hin, *hout;
        struct aoe_atahdr *ahin, *ahout;
        struct frame *f;
        struct buf *buf;
-       struct sk_buff *sl;
+       struct aoetgt *t;
+       struct aoeif *ifp;
        register long n;
        ulong flags;
        char ebuf[128];
        u16 aoemajor;
 
-       hin = (struct aoe_hdr *) skb->mac.raw;
-       aoemajor = be16_to_cpu(hin->major);
+       hin = (struct aoe_hdr *) skb_mac_header(skb);
+       aoemajor = get_unaligned_be16(&hin->major);
        d = aoedev_by_aoeaddr(aoemajor, hin->minor);
        if (d == NULL) {
                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
@@ -498,15 +764,24 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 
        spin_lock_irqsave(&d->lock, flags);
 
-       f = getframe(d, be32_to_cpu(hin->tag));
+       n = get_unaligned_be32(&hin->tag);
+       t = gettgt(d, hin->src);
+       if (t == NULL) {
+               printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
+                       d->aoemajor, d->aoeminor, hin->src);
+               spin_unlock_irqrestore(&d->lock, flags);
+               return;
+       }
+       f = getframe(t, n);
        if (f == NULL) {
+               calc_rttavg(d, -tsince(n));
                spin_unlock_irqrestore(&d->lock, flags);
                snprintf(ebuf, sizeof ebuf,
                        "%15s e%d.%d    tag=%08x@%08lx\n",
                        "unexpected rsp",
-                       be16_to_cpu(hin->major),
+                       get_unaligned_be16(&hin->major),
                        hin->minor,
-                       be32_to_cpu(hin->tag),
+                       get_unaligned_be32(&hin->tag),
                        jiffies);
                aoechr_error(ebuf);
                return;
@@ -515,218 +790,293 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        calc_rttavg(d, tsince(f->tag));
 
        ahin = (struct aoe_atahdr *) (hin+1);
-       ahout = (struct aoe_atahdr *) (f->skb->mac.raw + sizeof(struct aoe_hdr));
+       hout = (struct aoe_hdr *) skb_mac_header(f->skb);
+       ahout = (struct aoe_atahdr *) (hout+1);
        buf = f->buf;
 
-       if (ahout->cmdstat == WIN_IDENTIFY)
-               d->flags &= ~DEVFL_PAUSE;
        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
-               eprintk("ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
+               printk(KERN_ERR
+                       "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
                        ahout->cmdstat, ahin->cmdstat,
                        d->aoemajor, d->aoeminor);
                if (buf)
                        buf->flags |= BUFFL_FAIL;
        } else {
+               if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
+                       d->htgt = NULL;
                n = ahout->scnt << 9;
                switch (ahout->cmdstat) {
-               case WIN_READ:
-               case WIN_READ_EXT:
+               case ATA_CMD_PIO_READ:
+               case ATA_CMD_PIO_READ_EXT:
                        if (skb->len - sizeof *hin - sizeof *ahin < n) {
-                               eprintk("runt data size in read.  skb->len=%d\n",
-                                       skb->len);
+                               printk(KERN_ERR
+                                       "aoe: %s.  skb->len=%d need=%ld\n",
+                                       "runt data size in read", skb->len, n);
                                /* fail frame f?  just returning will rexmit. */
                                spin_unlock_irqrestore(&d->lock, flags);
                                return;
                        }
                        memcpy(f->bufaddr, ahin+1, n);
-               case WIN_WRITE:
-               case WIN_WRITE_EXT:
+               case ATA_CMD_PIO_WRITE:
+               case ATA_CMD_PIO_WRITE_EXT:
+                       ifp = getif(t, skb->dev);
+                       if (ifp) {
+                               ifp->lost = 0;
+                               if (n > DEFAULTBCNT)
+                                       ifp->lostjumbo = 0;
+                       }
                        if (f->bcnt -= n) {
+                               f->lba += n >> 9;
                                f->bufaddr += n;
-                               put_lba(ahout, f->lba += ahout->scnt);
-                               n = f->bcnt;
-                               if (n > DEFAULTBCNT)
-                                       n = DEFAULTBCNT;
-                               ahout->scnt = n >> 9;
-                               if (ahout->aflags & AOEAFL_WRITE)
-                                       skb_fill_page_desc(f->skb, 0,
-                                               virt_to_page(f->bufaddr),
-                                               offset_in_page(f->bufaddr), n);
-                               skb_get(f->skb);
-                               f->skb->next = NULL;
-                               spin_unlock_irqrestore(&d->lock, flags);
-                               aoenet_xmit(f->skb);
-                               return;
+                               resend(d, t, f);
+                               goto xmit;
                        }
-                       if (n > DEFAULTBCNT)
-                               d->lostjumbo = 0;
                        break;
-               case WIN_IDENTIFY:
+               case ATA_CMD_ID_ATA:
                        if (skb->len - sizeof *hin - sizeof *ahin < 512) {
-                               iprintk("runt data size in ataid.  skb->len=%d\n",
+                               printk(KERN_INFO
+                                       "aoe: runt data size in ataid.  skb->len=%d\n",
                                        skb->len);
                                spin_unlock_irqrestore(&d->lock, flags);
                                return;
                        }
-                       ataid_complete(d, (char *) (ahin+1));
+                       ataid_complete(d, t, (char *) (ahin+1));
                        break;
                default:
-                       iprintk("unrecognized ata command %2.2Xh for %d.%d\n",
+                       printk(KERN_INFO
+                               "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
                                ahout->cmdstat,
-                               be16_to_cpu(hin->major),
+                               get_unaligned_be16(&hin->major),
                                hin->minor);
                }
        }
 
-       if (buf) {
-               buf->nframesout -= 1;
-               if (buf->nframesout == 0 && buf->resid == 0) {
-                       unsigned long duration = jiffies - buf->start_time;
-                       unsigned long n_sect = buf->bio->bi_size >> 9;
-                       struct gendisk *disk = d->gd;
-                       const int rw = bio_data_dir(buf->bio);
-
-                       disk_stat_inc(disk, ios[rw]);
-                       disk_stat_add(disk, ticks[rw], duration);
-                       disk_stat_add(disk, sectors[rw], n_sect);
-                       disk_stat_add(disk, io_ticks, duration);
-                       n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
-                       bio_endio(buf->bio, buf->bio->bi_size, n);
-                       mempool_free(buf, d->bufpool);
+       if (buf && --buf->nframesout == 0 && buf->resid == 0) {
+               diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
+               if (buf->flags & BUFFL_FAIL)
+                       bio_endio(buf->bio, -EIO);
+               else {
+                       bio_flush_dcache_pages(buf->bio);
+                       bio_endio(buf->bio, 0);
                }
+               mempool_free(buf, d->bufpool);
        }
 
        f->buf = NULL;
        f->tag = FREETAG;
+       t->nout--;
 
        aoecmd_work(d);
-       sl = d->sendq_hd;
-       d->sendq_hd = d->sendq_tl = NULL;
+xmit:
+       __skb_queue_head_init(&queue);
+       skb_queue_splice_init(&d->sendq, &queue);
 
        spin_unlock_irqrestore(&d->lock, flags);
-       aoenet_xmit(sl);
+       aoenet_xmit(&queue);
 }
 
 void
 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 {
-       struct sk_buff *sl;
-
-       sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
+       struct sk_buff_head queue;
 
-       aoenet_xmit(sl);
+       __skb_queue_head_init(&queue);
+       aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
+       aoenet_xmit(&queue);
 }
  
-/*
- * Since we only call this in one place (and it only prepares one frame)
- * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
- */
-static struct sk_buff *
+struct sk_buff *
 aoecmd_ata_id(struct aoedev *d)
 {
        struct aoe_hdr *h;
        struct aoe_atahdr *ah;
        struct frame *f;
        struct sk_buff *skb;
+       struct aoetgt *t;
 
-       f = getframe(d, FREETAG);
-       if (f == NULL) {
-               eprintk("can't get a frame. This shouldn't happen.\n");
+       f = freeframe(d);
+       if (f == NULL)
                return NULL;
-       }
+
+       t = *d->tgt;
 
        /* initialize the headers & frame */
        skb = f->skb;
-       h = (struct aoe_hdr *) skb->mac.raw;
+       h = (struct aoe_hdr *) skb_mac_header(skb);
        ah = (struct aoe_atahdr *) (h+1);
-       skb->len = sizeof *h + sizeof *ah;
+       skb_put(skb, sizeof *h + sizeof *ah);
        memset(h, 0, skb->len);
-       f->tag = aoehdr_atainit(d, h);
+       f->tag = aoehdr_atainit(d, t, h);
+       t->nout++;
        f->waited = 0;
 
        /* set up ata header */
        ah->scnt = 1;
-       ah->cmdstat = WIN_IDENTIFY;
+       ah->cmdstat = ATA_CMD_ID_ATA;
        ah->lba3 = 0xa0;
 
-       skb->dev = d->ifp;
-       skb_get(skb);
+       skb->dev = t->ifp->nd;
 
        d->rttavg = MAXTIMER;
        d->timer.function = rexmit_timer;
 
-       return skb;
+       return skb_clone(skb, GFP_ATOMIC);
 }
  
+static struct aoetgt *
+addtgt(struct aoedev *d, char *addr, ulong nframes)
+{
+       struct aoetgt *t, **tt, **te;
+       struct frame *f, *e;
+
+       tt = d->targets;
+       te = tt + NTARGETS;
+       for (; tt < te && *tt; tt++)
+               ;
+
+       if (tt == te) {
+               printk(KERN_INFO
+                       "aoe: device addtgt failure; too many targets\n");
+               return NULL;
+       }
+       t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+       f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
+       if (!t || !f) {
+               kfree(f);
+               kfree(t);
+               printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
+               return NULL;
+       }
+
+       t->nframes = nframes;
+       t->frames = f;
+       e = f + nframes;
+       for (; f < e; f++)
+               f->tag = FREETAG;
+       memcpy(t->addr, addr, sizeof t->addr);
+       t->ifp = t->ifs;
+       t->maxout = t->nframes;
+       return *tt = t;
+}
+
 void
 aoecmd_cfg_rsp(struct sk_buff *skb)
 {
        struct aoedev *d;
        struct aoe_hdr *h;
        struct aoe_cfghdr *ch;
+       struct aoetgt *t;
+       struct aoeif *ifp;
        ulong flags, sysminor, aoemajor;
        struct sk_buff *sl;
-       enum { MAXFRAMES = 16 };
        u16 n;
 
-       h = (struct aoe_hdr *) skb->mac.raw;
+       h = (struct aoe_hdr *) skb_mac_header(skb);
        ch = (struct aoe_cfghdr *) (h+1);
 
        /*
         * Enough people have their dip switches set backwards to
         * warrant a loud message for this special case.
         */
-       aoemajor = be16_to_cpu(h->major);
+       aoemajor = get_unaligned_be16(&h->major);
        if (aoemajor == 0xfff) {
-               eprintk("Warning: shelf address is all ones.  "
+               printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
                        "Check shelf dip switches.\n");
                return;
        }
 
        sysminor = SYSMINOR(aoemajor, h->minor);
        if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
-               iprintk("e%ld.%d: minor number too large\n",
+               printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
                        aoemajor, (int) h->minor);
                return;
        }
 
        n = be16_to_cpu(ch->bufcnt);
-       if (n > MAXFRAMES)      /* keep it reasonable */
-               n = MAXFRAMES;
+       if (n > aoe_maxout)     /* keep it reasonable */
+               n = aoe_maxout;
 
-       d = aoedev_by_sysminor_m(sysminor, n);
+       d = aoedev_by_sysminor_m(sysminor);
        if (d == NULL) {
-               iprintk("device sysminor_m failure\n");
+               printk(KERN_INFO "aoe: device sysminor_m failure\n");
                return;
        }
 
        spin_lock_irqsave(&d->lock, flags);
 
-       /* permit device to migrate mac and network interface */
-       d->ifp = skb->dev;
-       memcpy(d->addr, h->src, sizeof d->addr);
-       if (!(d->flags & DEVFL_MAXBCNT)) {
-               n = d->ifp->mtu;
+       t = gettgt(d, h->src);
+       if (!t) {
+               t = addtgt(d, h->src, n);
+               if (!t) {
+                       spin_unlock_irqrestore(&d->lock, flags);
+                       return;
+               }
+       }
+       ifp = getif(t, skb->dev);
+       if (!ifp) {
+               ifp = addif(t, skb->dev);
+               if (!ifp) {
+                       printk(KERN_INFO
+                               "aoe: device addif failure; "
+                               "too many interfaces?\n");
+                       spin_unlock_irqrestore(&d->lock, flags);
+                       return;
+               }
+       }
+       if (ifp->maxbcnt) {
+               n = ifp->nd->mtu;
                n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
                n /= 512;
                if (n > ch->scnt)
                        n = ch->scnt;
-               d->maxbcnt = n ? n * 512 : DEFAULTBCNT;
+               n = n ? n * 512 : DEFAULTBCNT;
+               if (n != ifp->maxbcnt) {
+                       printk(KERN_INFO
+                               "aoe: e%ld.%d: setting %d%s%s:%pm\n",
+                               d->aoemajor, d->aoeminor, n,
+                               " byte data frames on ", ifp->nd->name,
+                               t->addr);
+                       ifp->maxbcnt = n;
+               }
        }
 
        /* don't change users' perspective */
-       if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
+       if (d->nopen) {
                spin_unlock_irqrestore(&d->lock, flags);
                return;
        }
-       d->flags |= DEVFL_PAUSE;        /* force pause */
        d->fw_ver = be16_to_cpu(ch->fwver);
 
-       /* check for already outstanding ataid */
-       sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
+       sl = aoecmd_ata_id(d);
 
        spin_unlock_irqrestore(&d->lock, flags);
 
-       aoenet_xmit(sl);
+       if (sl) {
+               struct sk_buff_head queue;
+               __skb_queue_head_init(&queue);
+               __skb_queue_tail(&queue, sl);
+               aoenet_xmit(&queue);
+       }
 }
 
+void
+aoecmd_cleanslate(struct aoedev *d)
+{
+       struct aoetgt **t, **te;
+       struct aoeif *p, *e;
+
+       d->mintimer = MINTIMER;
+
+       t = d->targets;
+       te = t + NTARGETS;
+       for (; t < te && *t; t++) {
+               (*t)->maxout = (*t)->nframes;
+               p = (*t)->ifs;
+               e = p + NAOEIFS;
+               for (; p < e; p++) {
+                       p->lostjumbo = 0;
+                       p->lost = 0;
+                       p->maxbcnt = DEFAULTBCNT;
+               }
+       }
+}