1 /* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <asm/unaligned.h>
15 #define TIMERTICK (HZ / 10)
16 #define MINTIMER (2 * TIMERTICK)
17 #define MAXTIMER (HZ << 1)
18 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
25 skb = alloc_skb(len, GFP_ATOMIC);
27 skb->nh.raw = skb->mac.raw = skb->data;
28 skb->protocol = __constant_htons(ETH_P_AOE);
31 memset(skb->head, 0, len);
32 skb->next = skb->prev = NULL;
34 /* tell the network layer not to perform IP checksums
35 * or to get the NIC to do it
37 skb->ip_summed = CHECKSUM_NONE;
43 getframe(struct aoedev *d, int tag)
56 * Leave the top bit clear so we have tagspace for userland.
57 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
58 * This driver reserves tag -1 to mean "unused frame."
61 newtag(struct aoedev *d)
66 return n |= (++d->lasttag & 0x7fff) << 16;
70 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
72 u32 host_tag = newtag(d);
74 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
75 memcpy(h->dst, d->addr, sizeof h->dst);
76 h->type = __constant_cpu_to_be16(ETH_P_AOE);
78 h->major = cpu_to_be16(d->aoemajor);
79 h->minor = d->aoeminor;
81 h->tag = cpu_to_be32(host_tag);
87 put_lba(struct aoe_atahdr *ah, sector_t lba)
98 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
101 struct aoe_atahdr *ah;
105 register sector_t sector;
106 char writebit, extbit;
113 sector = buf->sector;
114 bcnt = buf->bv_resid;
115 if (bcnt > d->maxbcnt)
118 /* initialize the headers & frame */
120 h = (struct aoe_hdr *) skb->mac.raw;
121 ah = (struct aoe_atahdr *) (h+1);
122 skb->len = sizeof *h + sizeof *ah;
123 memset(h, 0, ETH_ZLEN);
124 f->tag = aoehdr_atainit(d, h);
127 f->bufaddr = buf->bufaddr;
131 /* set up ata header */
132 ah->scnt = bcnt >> 9;
134 if (d->flags & DEVFL_EXT) {
135 ah->aflags |= AOEAFL_EXT;
139 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
142 if (bio_data_dir(buf->bio) == WRITE) {
143 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
144 offset_in_page(f->bufaddr), bcnt);
145 ah->aflags |= AOEAFL_WRITE;
147 skb->data_len = bcnt;
153 ah->cmdstat = WIN_READ | writebit | extbit;
155 /* mark all tracking fields and load out */
156 buf->nframesout += 1;
157 buf->bufaddr += bcnt;
158 buf->bv_resid -= bcnt;
159 /* dprintk("bv_resid=%ld\n", buf->bv_resid); */
161 buf->sector += bcnt >> 9;
162 if (buf->resid == 0) {
164 } else if (buf->bv_resid == 0) {
166 buf->bv_resid = buf->bv->bv_len;
167 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
171 skb = skb_clone(skb, GFP_ATOMIC);
175 d->sendq_tl->next = skb;
181 /* some callers cannot sleep, and they can call this function,
182 * transmitting the packets later, when interrupts are on
184 static struct sk_buff *
185 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
188 struct aoe_cfghdr *ch;
189 struct sk_buff *skb, *sl, *sl_tail;
190 struct net_device *ifp;
194 read_lock(&dev_base_lock);
195 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
197 if (!is_aoe_netif(ifp))
200 skb = new_skb(sizeof *h + sizeof *ch);
202 iprintk("skb alloc failure\n");
208 h = (struct aoe_hdr *) skb->mac.raw;
209 memset(h, 0, sizeof *h + sizeof *ch);
211 memset(h->dst, 0xff, sizeof h->dst);
212 memcpy(h->src, ifp->dev_addr, sizeof h->src);
213 h->type = __constant_cpu_to_be16(ETH_P_AOE);
215 h->major = cpu_to_be16(aoemajor);
222 read_unlock(&dev_base_lock);
229 static struct frame *
230 freeframe(struct aoedev *d)
238 if (f->tag != FREETAG)
240 if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) {
241 skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
246 if (n == d->nframes) /* wait for network layer */
247 d->flags |= DEVFL_KICKME;
252 /* enters with d->lock held */
254 aoecmd_work(struct aoedev *d)
259 if (d->flags & DEVFL_PAUSE) {
260 if (!aoedev_isbusy(d))
261 d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
262 d->aoeminor, &d->sendq_tl);
270 if (d->inprocess == NULL) {
271 if (list_empty(&d->bufq))
273 buf = container_of(d->bufq.next, struct buf, bufs);
274 list_del(d->bufq.next);
275 /*dprintk("bi_size=%ld\n", buf->bio->bi_size); */
283 rexmit(struct aoedev *d, struct frame *f)
287 struct aoe_atahdr *ah;
293 snprintf(buf, sizeof buf,
294 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
296 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
300 h = (struct aoe_hdr *) skb->mac.raw;
301 ah = (struct aoe_atahdr *) (h+1);
303 h->tag = cpu_to_be32(n);
304 memcpy(h->dst, d->addr, sizeof h->dst);
305 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
307 n = DEFAULTBCNT / 512;
310 if (ah->aflags & AOEAFL_WRITE) {
311 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
312 offset_in_page(f->bufaddr), DEFAULTBCNT);
313 skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT;
314 skb->data_len = DEFAULTBCNT;
316 if (++d->lostjumbo > (d->nframes << 1))
317 if (d->maxbcnt != DEFAULTBCNT) {
318 iprintk("e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
319 d->aoemajor, d->aoeminor, d->ifp->name);
320 d->maxbcnt = DEFAULTBCNT;
321 d->flags |= DEVFL_MAXBCNT;
326 skb = skb_clone(skb, GFP_ATOMIC);
330 d->sendq_tl->next = skb;
341 n = jiffies & 0xffff;
349 rexmit_timer(ulong vp)
354 register long timeout;
357 d = (struct aoedev *) vp;
360 /* timeout is always ~150% of the moving average */
362 timeout += timeout >> 1;
364 spin_lock_irqsave(&d->lock, flags);
366 if (d->flags & DEVFL_TKILL) {
367 spin_unlock_irqrestore(&d->lock, flags);
373 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
374 n = f->waited += timeout;
376 if (n > MAXWAIT) { /* waited too long. device failure. */
383 if (d->flags & DEVFL_KICKME) {
384 d->flags &= ~DEVFL_KICKME;
389 d->sendq_hd = d->sendq_tl = NULL;
393 d->rttavg = MAXTIMER;
396 d->timer.expires = jiffies + TIMERTICK;
397 add_timer(&d->timer);
399 spin_unlock_irqrestore(&d->lock, flags);
404 /* this function performs work that has been deferred until sleeping is OK
407 aoecmd_sleepwork(void *vp)
409 struct aoedev *d = (struct aoedev *) vp;
411 if (d->flags & DEVFL_GDALLOC)
414 if (d->flags & DEVFL_NEWSIZE) {
415 struct block_device *bd;
419 ssize = d->gd->capacity;
420 bd = bdget_disk(d->gd, 0);
423 mutex_lock(&bd->bd_inode->i_mutex);
424 i_size_write(bd->bd_inode, (loff_t)ssize<<9);
425 mutex_unlock(&bd->bd_inode->i_mutex);
428 spin_lock_irqsave(&d->lock, flags);
429 d->flags |= DEVFL_UP;
430 d->flags &= ~DEVFL_NEWSIZE;
431 spin_unlock_irqrestore(&d->lock, flags);
436 ataid_complete(struct aoedev *d, unsigned char *id)
441 /* word 83: command set supported */
442 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
444 /* word 86: command set/feature enabled */
445 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
447 if (n & (1<<10)) { /* bit 10: LBA 48 */
448 d->flags |= DEVFL_EXT;
450 /* word 100: number lba48 sectors */
451 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
453 /* set as in ide-disk.c:init_idedisk_capacity */
454 d->geo.cylinders = ssize;
455 d->geo.cylinders /= (255 * 63);
459 d->flags &= ~DEVFL_EXT;
461 /* number lba28 sectors */
462 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
464 /* NOTE: obsolete in ATA 6 */
465 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
466 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
467 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
470 if (d->ssize != ssize)
471 iprintk("%012llx e%lu.%lu v%04x has %llu sectors\n",
472 (unsigned long long)mac_addr(d->addr),
473 d->aoemajor, d->aoeminor,
474 d->fw_ver, (long long)ssize);
478 d->gd->capacity = ssize;
479 d->flags |= DEVFL_NEWSIZE;
481 if (d->flags & DEVFL_GDALLOC) {
482 eprintk("can't schedule work for e%lu.%lu, %s\n",
483 d->aoemajor, d->aoeminor,
484 "it's already on! This shouldn't happen.\n");
487 d->flags |= DEVFL_GDALLOC;
489 schedule_work(&d->work);
493 calc_rttavg(struct aoedev *d, int rtt)
502 else if (n > MAXTIMER)
504 d->mintimer += (n - d->mintimer) >> 1;
505 } else if (n < d->mintimer)
507 else if (n > MAXTIMER)
510 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
516 aoecmd_ata_rsp(struct sk_buff *skb)
519 struct aoe_hdr *hin, *hout;
520 struct aoe_atahdr *ahin, *ahout;
529 hin = (struct aoe_hdr *) skb->mac.raw;
530 aoemajor = be16_to_cpu(hin->major);
531 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
533 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
534 "for unknown device %d.%d\n",
535 aoemajor, hin->minor);
540 spin_lock_irqsave(&d->lock, flags);
542 n = be32_to_cpu(hin->tag);
545 calc_rttavg(d, -tsince(n));
546 spin_unlock_irqrestore(&d->lock, flags);
547 snprintf(ebuf, sizeof ebuf,
548 "%15s e%d.%d tag=%08x@%08lx\n",
550 be16_to_cpu(hin->major),
552 be32_to_cpu(hin->tag),
558 calc_rttavg(d, tsince(f->tag));
560 ahin = (struct aoe_atahdr *) (hin+1);
561 hout = (struct aoe_hdr *) f->skb->mac.raw;
562 ahout = (struct aoe_atahdr *) (hout+1);
565 if (ahout->cmdstat == WIN_IDENTIFY)
566 d->flags &= ~DEVFL_PAUSE;
567 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
568 eprintk("ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
569 ahout->cmdstat, ahin->cmdstat,
570 d->aoemajor, d->aoeminor);
572 buf->flags |= BUFFL_FAIL;
574 n = ahout->scnt << 9;
575 switch (ahout->cmdstat) {
578 if (skb->len - sizeof *hin - sizeof *ahin < n) {
579 eprintk("runt data size in read. skb->len=%d\n",
581 /* fail frame f? just returning will rexmit. */
582 spin_unlock_irqrestore(&d->lock, flags);
585 memcpy(f->bufaddr, ahin+1, n);
591 put_lba(ahout, f->lba += ahout->scnt);
595 ahout->scnt = n >> 9;
596 if (ahout->aflags & AOEAFL_WRITE) {
597 skb_fill_page_desc(skb, 0,
598 virt_to_page(f->bufaddr),
599 offset_in_page(f->bufaddr), n);
600 skb->len = sizeof *hout + sizeof *ahout + n;
604 hout->tag = cpu_to_be32(f->tag);
606 skb = skb_clone(skb, GFP_ATOMIC);
607 spin_unlock_irqrestore(&d->lock, flags);
616 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
617 iprintk("runt data size in ataid. skb->len=%d\n",
619 spin_unlock_irqrestore(&d->lock, flags);
622 ataid_complete(d, (char *) (ahin+1));
625 iprintk("unrecognized ata command %2.2Xh for %d.%d\n",
627 be16_to_cpu(hin->major),
633 buf->nframesout -= 1;
634 if (buf->nframesout == 0 && buf->resid == 0) {
635 unsigned long duration = jiffies - buf->start_time;
636 unsigned long n_sect = buf->bio->bi_size >> 9;
637 struct gendisk *disk = d->gd;
638 const int rw = bio_data_dir(buf->bio);
640 disk_stat_inc(disk, ios[rw]);
641 disk_stat_add(disk, ticks[rw], duration);
642 disk_stat_add(disk, sectors[rw], n_sect);
643 disk_stat_add(disk, io_ticks, duration);
644 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
645 bio_endio(buf->bio, buf->bio->bi_size, n);
646 mempool_free(buf, d->bufpool);
655 d->sendq_hd = d->sendq_tl = NULL;
657 spin_unlock_irqrestore(&d->lock, flags);
662 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
666 sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
672 * Since we only call this in one place (and it only prepares one frame)
673 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
675 static struct sk_buff *
676 aoecmd_ata_id(struct aoedev *d)
679 struct aoe_atahdr *ah;
685 eprintk("can't get a frame. This shouldn't happen.\n");
689 /* initialize the headers & frame */
691 h = (struct aoe_hdr *) skb->mac.raw;
692 ah = (struct aoe_atahdr *) (h+1);
694 memset(h, 0, ETH_ZLEN);
695 f->tag = aoehdr_atainit(d, h);
698 /* set up ata header */
700 ah->cmdstat = WIN_IDENTIFY;
705 d->rttavg = MAXTIMER;
706 d->timer.function = rexmit_timer;
708 return skb_clone(skb, GFP_ATOMIC);
712 aoecmd_cfg_rsp(struct sk_buff *skb)
716 struct aoe_cfghdr *ch;
717 ulong flags, sysminor, aoemajor;
719 enum { MAXFRAMES = 16 };
722 h = (struct aoe_hdr *) skb->mac.raw;
723 ch = (struct aoe_cfghdr *) (h+1);
726 * Enough people have their dip switches set backwards to
727 * warrant a loud message for this special case.
729 aoemajor = be16_to_cpu(h->major);
730 if (aoemajor == 0xfff) {
731 eprintk("Warning: shelf address is all ones. "
732 "Check shelf dip switches.\n");
736 sysminor = SYSMINOR(aoemajor, h->minor);
737 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
738 iprintk("e%ld.%d: minor number too large\n",
739 aoemajor, (int) h->minor);
743 n = be16_to_cpu(ch->bufcnt);
744 if (n > MAXFRAMES) /* keep it reasonable */
747 d = aoedev_by_sysminor_m(sysminor, n);
749 iprintk("device sysminor_m failure\n");
753 spin_lock_irqsave(&d->lock, flags);
755 /* permit device to migrate mac and network interface */
757 memcpy(d->addr, h->src, sizeof d->addr);
758 if (!(d->flags & DEVFL_MAXBCNT)) {
760 n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
764 n = n ? n * 512 : DEFAULTBCNT;
765 if (n != d->maxbcnt) {
766 iprintk("e%ld.%ld: setting %d byte data frames on %s\n",
767 d->aoemajor, d->aoeminor, n, d->ifp->name);
772 /* don't change users' perspective */
773 if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
774 spin_unlock_irqrestore(&d->lock, flags);
777 d->flags |= DEVFL_PAUSE; /* force pause */
778 d->mintimer = MINTIMER;
779 d->fw_ver = be16_to_cpu(ch->fwver);
781 /* check for already outstanding ataid */
782 sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
784 spin_unlock_irqrestore(&d->lock, flags);