]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - drivers/infiniband/hw/ipath/ipath_driver.c
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
[linux-2.6.git] / drivers / infiniband / hw / ipath / ipath_driver.c
1 /*
2  * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/sched.h>
35 #include <linux/spinlock.h>
36 #include <linux/idr.h>
37 #include <linux/pci.h>
38 #include <linux/io.h>
39 #include <linux/delay.h>
40 #include <linux/netdevice.h>
41 #include <linux/vmalloc.h>
42
43 #include "ipath_kernel.h"
44 #include "ipath_verbs.h"
45
46 static void ipath_update_pio_bufs(struct ipath_devdata *);
47
48 const char *ipath_get_unit_name(int unit)
49 {
50         static char iname[16];
51         snprintf(iname, sizeof iname, "infinipath%u", unit);
52         return iname;
53 }
54
55 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
56 #define PFX IPATH_DRV_NAME ": "
57
58 /*
59  * The size has to be longer than this string, so we can append
60  * board/chip information to it in the init code.
61  */
62 const char ib_ipath_version[] = IPATH_IDSTR "\n";
63
64 static struct idr unit_table;
65 DEFINE_SPINLOCK(ipath_devs_lock);
66 LIST_HEAD(ipath_dev_list);
67
68 wait_queue_head_t ipath_state_wait;
69
70 unsigned ipath_debug = __IPATH_INFO;
71
72 module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
73 MODULE_PARM_DESC(debug, "mask for debug prints");
74 EXPORT_SYMBOL_GPL(ipath_debug);
75
76 unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
77 module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
78 MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
79
80 static unsigned ipath_hol_timeout_ms = 13000;
81 module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
82 MODULE_PARM_DESC(hol_timeout_ms,
83         "duration of user app suspension after link failure");
84
85 unsigned ipath_linkrecovery = 1;
86 module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
87 MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
88
89 MODULE_LICENSE("GPL");
90 MODULE_AUTHOR("QLogic <support@qlogic.com>");
91 MODULE_DESCRIPTION("QLogic InfiniPath driver");
92
93 /*
94  * Table to translate the LINKTRAININGSTATE portion of
95  * IBCStatus to a human-readable form.
96  */
97 const char *ipath_ibcstatus_str[] = {
98         "Disabled",
99         "LinkUp",
100         "PollActive",
101         "PollQuiet",
102         "SleepDelay",
103         "SleepQuiet",
104         "LState6",              /* unused */
105         "LState7",              /* unused */
106         "CfgDebounce",
107         "CfgRcvfCfg",
108         "CfgWaitRmt",
109         "CfgIdle",
110         "RecovRetrain",
111         "CfgTxRevLane",         /* unused before IBA7220 */
112         "RecovWaitRmt",
113         "RecovIdle",
114         /* below were added for IBA7220 */
115         "CfgEnhanced",
116         "CfgTest",
117         "CfgWaitRmtTest",
118         "CfgWaitCfgEnhanced",
119         "SendTS_T",
120         "SendTstIdles",
121         "RcvTS_T",
122         "SendTst_TS1s",
123         "LTState18", "LTState19", "LTState1A", "LTState1B",
124         "LTState1C", "LTState1D", "LTState1E", "LTState1F"
125 };
126
127 static void __devexit ipath_remove_one(struct pci_dev *);
128 static int __devinit ipath_init_one(struct pci_dev *,
129                                     const struct pci_device_id *);
130
131 /* Only needed for registration, nothing else needs this info */
132 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
133 #define PCI_VENDOR_ID_QLOGIC 0x1077
134 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
135 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
136 #define PCI_DEVICE_ID_INFINIPATH_7220 0x7220
137
138 /* Number of seconds before our card status check...  */
139 #define STATUS_TIMEOUT 60
140
141 static const struct pci_device_id ipath_pci_tbl[] = {
142         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
143         { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
144         { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) },
145         { 0, }
146 };
147
148 MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
149
150 static struct pci_driver ipath_driver = {
151         .name = IPATH_DRV_NAME,
152         .probe = ipath_init_one,
153         .remove = __devexit_p(ipath_remove_one),
154         .id_table = ipath_pci_tbl,
155         .driver = {
156                 .groups = ipath_driver_attr_groups,
157         },
158 };
159
160 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
161                              u32 *bar0, u32 *bar1)
162 {
163         int ret;
164
165         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
166         if (ret)
167                 ipath_dev_err(dd, "failed to read bar0 before enable: "
168                               "error %d\n", -ret);
169
170         ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
171         if (ret)
172                 ipath_dev_err(dd, "failed to read bar1 before enable: "
173                               "error %d\n", -ret);
174
175         ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
176 }
177
178 static void ipath_free_devdata(struct pci_dev *pdev,
179                                struct ipath_devdata *dd)
180 {
181         unsigned long flags;
182
183         pci_set_drvdata(pdev, NULL);
184
185         if (dd->ipath_unit != -1) {
186                 spin_lock_irqsave(&ipath_devs_lock, flags);
187                 idr_remove(&unit_table, dd->ipath_unit);
188                 list_del(&dd->ipath_list);
189                 spin_unlock_irqrestore(&ipath_devs_lock, flags);
190         }
191         vfree(dd);
192 }
193
194 static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
195 {
196         unsigned long flags;
197         struct ipath_devdata *dd;
198         int ret;
199
200         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
201                 dd = ERR_PTR(-ENOMEM);
202                 goto bail;
203         }
204
205         dd = vmalloc(sizeof(*dd));
206         if (!dd) {
207                 dd = ERR_PTR(-ENOMEM);
208                 goto bail;
209         }
210         memset(dd, 0, sizeof(*dd));
211         dd->ipath_unit = -1;
212
213         spin_lock_irqsave(&ipath_devs_lock, flags);
214
215         ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
216         if (ret < 0) {
217                 printk(KERN_ERR IPATH_DRV_NAME
218                        ": Could not allocate unit ID: error %d\n", -ret);
219                 ipath_free_devdata(pdev, dd);
220                 dd = ERR_PTR(ret);
221                 goto bail_unlock;
222         }
223
224         dd->pcidev = pdev;
225         pci_set_drvdata(pdev, dd);
226
227         list_add(&dd->ipath_list, &ipath_dev_list);
228
229 bail_unlock:
230         spin_unlock_irqrestore(&ipath_devs_lock, flags);
231
232 bail:
233         return dd;
234 }
235
236 static inline struct ipath_devdata *__ipath_lookup(int unit)
237 {
238         return idr_find(&unit_table, unit);
239 }
240
241 struct ipath_devdata *ipath_lookup(int unit)
242 {
243         struct ipath_devdata *dd;
244         unsigned long flags;
245
246         spin_lock_irqsave(&ipath_devs_lock, flags);
247         dd = __ipath_lookup(unit);
248         spin_unlock_irqrestore(&ipath_devs_lock, flags);
249
250         return dd;
251 }
252
253 int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
254 {
255         int nunits, npresent, nup;
256         struct ipath_devdata *dd;
257         unsigned long flags;
258         int maxports;
259
260         nunits = npresent = nup = maxports = 0;
261
262         spin_lock_irqsave(&ipath_devs_lock, flags);
263
264         list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
265                 nunits++;
266                 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
267                         npresent++;
268                 if (dd->ipath_lid &&
269                     !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
270                                          | IPATH_LINKUNK)))
271                         nup++;
272                 if (dd->ipath_cfgports > maxports)
273                         maxports = dd->ipath_cfgports;
274         }
275
276         spin_unlock_irqrestore(&ipath_devs_lock, flags);
277
278         if (npresentp)
279                 *npresentp = npresent;
280         if (nupp)
281                 *nupp = nup;
282         if (maxportsp)
283                 *maxportsp = maxports;
284
285         return nunits;
286 }
287
288 /*
289  * These next two routines are placeholders in case we don't have per-arch
290  * code for controlling write combining.  If explicit control of write
291  * combining is not available, performance will probably be awful.
292  */
293
294 int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
295 {
296         return -EOPNOTSUPP;
297 }
298
299 void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
300 {
301 }
302
303 /*
304  * Perform a PIO buffer bandwidth write test, to verify proper system
305  * configuration.  Even when all the setup calls work, occasionally
306  * BIOS or other issues can prevent write combining from working, or
307  * can cause other bandwidth problems to the chip.
308  *
309  * This test simply writes the same buffer over and over again, and
310  * measures close to the peak bandwidth to the chip (not testing
311  * data bandwidth to the wire).   On chips that use an address-based
312  * trigger to send packets to the wire, this is easy.  On chips that
313  * use a count to trigger, we want to make sure that the packet doesn't
314  * go out on the wire, or trigger flow control checks.
315  */
316 static void ipath_verify_pioperf(struct ipath_devdata *dd)
317 {
318         u32 pbnum, cnt, lcnt;
319         u32 __iomem *piobuf;
320         u32 *addr;
321         u64 msecs, emsecs;
322
323         piobuf = ipath_getpiobuf(dd, 0, &pbnum);
324         if (!piobuf) {
325                 dev_info(&dd->pcidev->dev,
326                         "No PIObufs for checking perf, skipping\n");
327                 return;
328         }
329
330         /*
331          * Enough to give us a reasonable test, less than piobuf size, and
332          * likely multiple of store buffer length.
333          */
334         cnt = 1024;
335
336         addr = vmalloc(cnt);
337         if (!addr) {
338                 dev_info(&dd->pcidev->dev,
339                         "Couldn't get memory for checking PIO perf,"
340                         " skipping\n");
341                 goto done;
342         }
343
344         preempt_disable();  /* we want reasonably accurate elapsed time */
345         msecs = 1 + jiffies_to_msecs(jiffies);
346         for (lcnt = 0; lcnt < 10000U; lcnt++) {
347                 /* wait until we cross msec boundary */
348                 if (jiffies_to_msecs(jiffies) >= msecs)
349                         break;
350                 udelay(1);
351         }
352
353         ipath_disable_armlaunch(dd);
354
355         /*
356          * length 0, no dwords actually sent, and mark as VL15
357          * on chips where that may matter (due to IB flowcontrol)
358          */
359         if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
360                 writeq(1UL << 63, piobuf);
361         else
362                 writeq(0, piobuf);
363         ipath_flush_wc();
364
365         /*
366          * this is only roughly accurate, since even with preempt we
367          * still take interrupts that could take a while.   Running for
368          * >= 5 msec seems to get us "close enough" to accurate values
369          */
370         msecs = jiffies_to_msecs(jiffies);
371         for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
372                 __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
373                 emsecs = jiffies_to_msecs(jiffies) - msecs;
374         }
375
376         /* 1 GiB/sec, slightly over IB SDR line rate */
377         if (lcnt < (emsecs * 1024U))
378                 ipath_dev_err(dd,
379                         "Performance problem: bandwidth to PIO buffers is "
380                         "only %u MiB/sec\n",
381                         lcnt / (u32) emsecs);
382         else
383                 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
384                         lcnt / (u32) emsecs);
385
386         preempt_enable();
387
388         vfree(addr);
389
390 done:
391         /* disarm piobuf, so it's available again */
392         ipath_disarm_piobufs(dd, pbnum, 1);
393         ipath_enable_armlaunch(dd);
394 }
395
396 static int __devinit ipath_init_one(struct pci_dev *pdev,
397                                     const struct pci_device_id *ent)
398 {
399         int ret, len, j;
400         struct ipath_devdata *dd;
401         unsigned long long addr;
402         u32 bar0 = 0, bar1 = 0;
403         u8 rev;
404
405         dd = ipath_alloc_devdata(pdev);
406         if (IS_ERR(dd)) {
407                 ret = PTR_ERR(dd);
408                 printk(KERN_ERR IPATH_DRV_NAME
409                        ": Could not allocate devdata: error %d\n", -ret);
410                 goto bail;
411         }
412
413         ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
414
415         ret = pci_enable_device(pdev);
416         if (ret) {
417                 /* This can happen iff:
418                  *
419                  * We did a chip reset, and then failed to reprogram the
420                  * BAR, or the chip reset due to an internal error.  We then
421                  * unloaded the driver and reloaded it.
422                  *
423                  * Both reset cases set the BAR back to initial state.  For
424                  * the latter case, the AER sticky error bit at offset 0x718
425                  * should be set, but the Linux kernel doesn't yet know
426                  * about that, it appears.  If the original BAR was retained
427                  * in the kernel data structures, this may be OK.
428                  */
429                 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
430                               dd->ipath_unit, -ret);
431                 goto bail_devdata;
432         }
433         addr = pci_resource_start(pdev, 0);
434         len = pci_resource_len(pdev, 0);
435         ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
436                    "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
437                    ent->device, ent->driver_data);
438
439         read_bars(dd, pdev, &bar0, &bar1);
440
441         if (!bar1 && !(bar0 & ~0xf)) {
442                 if (addr) {
443                         dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
444                                  "rewriting as %llx\n", addr);
445                         ret = pci_write_config_dword(
446                                 pdev, PCI_BASE_ADDRESS_0, addr);
447                         if (ret) {
448                                 ipath_dev_err(dd, "rewrite of BAR0 "
449                                               "failed: err %d\n", -ret);
450                                 goto bail_disable;
451                         }
452                         ret = pci_write_config_dword(
453                                 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
454                         if (ret) {
455                                 ipath_dev_err(dd, "rewrite of BAR1 "
456                                               "failed: err %d\n", -ret);
457                                 goto bail_disable;
458                         }
459                 } else {
460                         ipath_dev_err(dd, "BAR is 0 (probable RESET), "
461                                       "not usable until reboot\n");
462                         ret = -ENODEV;
463                         goto bail_disable;
464                 }
465         }
466
467         ret = pci_request_regions(pdev, IPATH_DRV_NAME);
468         if (ret) {
469                 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
470                          "err %d\n", dd->ipath_unit, -ret);
471                 goto bail_disable;
472         }
473
474         ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
475         if (ret) {
476                 /*
477                  * if the 64 bit setup fails, try 32 bit.  Some systems
478                  * do not setup 64 bit maps on systems with 2GB or less
479                  * memory installed.
480                  */
481                 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
482                 if (ret) {
483                         dev_info(&pdev->dev,
484                                 "Unable to set DMA mask for unit %u: %d\n",
485                                 dd->ipath_unit, ret);
486                         goto bail_regions;
487                 }
488                 else {
489                         ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
490                         ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
491                         if (ret)
492                                 dev_info(&pdev->dev,
493                                         "Unable to set DMA consistent mask "
494                                         "for unit %u: %d\n",
495                                         dd->ipath_unit, ret);
496
497                 }
498         }
499         else {
500                 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
501                 if (ret)
502                         dev_info(&pdev->dev,
503                                 "Unable to set DMA consistent mask "
504                                 "for unit %u: %d\n",
505                                 dd->ipath_unit, ret);
506         }
507
508         pci_set_master(pdev);
509
510         /*
511          * Save BARs to rewrite after device reset.  Save all 64 bits of
512          * BAR, just in case.
513          */
514         dd->ipath_pcibar0 = addr;
515         dd->ipath_pcibar1 = addr >> 32;
516         dd->ipath_deviceid = ent->device;       /* save for later use */
517         dd->ipath_vendorid = ent->vendor;
518
519         /* setup the chip-specific functions, as early as possible. */
520         switch (ent->device) {
521         case PCI_DEVICE_ID_INFINIPATH_HT:
522 #ifdef CONFIG_HT_IRQ
523                 ipath_init_iba6110_funcs(dd);
524                 break;
525 #else
526                 ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
527                               "CONFIG_HT_IRQ is not enabled\n", ent->device);
528                 return -ENODEV;
529 #endif
530         case PCI_DEVICE_ID_INFINIPATH_PE800:
531 #ifdef CONFIG_PCI_MSI
532                 ipath_init_iba6120_funcs(dd);
533                 break;
534 #else
535                 ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
536                               "CONFIG_PCI_MSI is not enabled\n", ent->device);
537                 return -ENODEV;
538 #endif
539         case PCI_DEVICE_ID_INFINIPATH_7220:
540 #ifndef CONFIG_PCI_MSI
541                 ipath_dbg("CONFIG_PCI_MSI is not enabled, "
542                           "using INTx for unit %u\n", dd->ipath_unit);
543 #endif
544                 ipath_init_iba7220_funcs(dd);
545                 break;
546         default:
547                 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
548                               "failing\n", ent->device);
549                 return -ENODEV;
550         }
551
552         for (j = 0; j < 6; j++) {
553                 if (!pdev->resource[j].start)
554                         continue;
555                 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
556                            j, (unsigned long long)pdev->resource[j].start,
557                            (unsigned long long)pdev->resource[j].end,
558                            (unsigned long long)pci_resource_len(pdev, j));
559         }
560
561         if (!addr) {
562                 ipath_dev_err(dd, "No valid address in BAR 0!\n");
563                 ret = -ENODEV;
564                 goto bail_regions;
565         }
566
567         ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
568         if (ret) {
569                 ipath_dev_err(dd, "Failed to read PCI revision ID unit "
570                               "%u: err %d\n", dd->ipath_unit, -ret);
571                 goto bail_regions;      /* shouldn't ever happen */
572         }
573         dd->ipath_pcirev = rev;
574
575 #if defined(__powerpc__)
576         /* There isn't a generic way to specify writethrough mappings */
577         dd->ipath_kregbase = __ioremap(addr, len,
578                 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
579 #else
580         dd->ipath_kregbase = ioremap_nocache(addr, len);
581 #endif
582
583         if (!dd->ipath_kregbase) {
584                 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
585                           addr);
586                 ret = -ENOMEM;
587                 goto bail_iounmap;
588         }
589         dd->ipath_kregend = (u64 __iomem *)
590                 ((void __iomem *)dd->ipath_kregbase + len);
591         dd->ipath_physaddr = addr;      /* used for io_remap, etc. */
592         /* for user mmap */
593         ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
594                    addr, dd->ipath_kregbase);
595
596         if (dd->ipath_f_bus(dd, pdev))
597                 ipath_dev_err(dd, "Failed to setup config space; "
598                               "continuing anyway\n");
599
600         /*
601          * set up our interrupt handler; IRQF_SHARED probably not needed,
602          * since MSI interrupts shouldn't be shared but won't  hurt for now.
603          * check 0 irq after we return from chip-specific bus setup, since
604          * that can affect this due to setup
605          */
606         if (!dd->ipath_irq)
607                 ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
608                               "work\n");
609         else {
610                 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
611                                   IPATH_DRV_NAME, dd);
612                 if (ret) {
613                         ipath_dev_err(dd, "Couldn't setup irq handler, "
614                                       "irq=%d: %d\n", dd->ipath_irq, ret);
615                         goto bail_iounmap;
616                 }
617         }
618
619         ret = ipath_init_chip(dd, 0);   /* do the chip-specific init */
620         if (ret)
621                 goto bail_irqsetup;
622
623         ret = ipath_enable_wc(dd);
624
625         if (ret) {
626                 ipath_dev_err(dd, "Write combining not enabled "
627                               "(err %d): performance may be poor\n",
628                               -ret);
629                 ret = 0;
630         }
631
632         ipath_verify_pioperf(dd);
633
634         ipath_device_create_group(&pdev->dev, dd);
635         ipathfs_add_device(dd);
636         ipath_user_add(dd);
637         ipath_diag_add(dd);
638         ipath_register_ib_device(dd);
639
640         goto bail;
641
642 bail_irqsetup:
643         if (pdev->irq)
644                 free_irq(pdev->irq, dd);
645
646 bail_iounmap:
647         iounmap((volatile void __iomem *) dd->ipath_kregbase);
648
649 bail_regions:
650         pci_release_regions(pdev);
651
652 bail_disable:
653         pci_disable_device(pdev);
654
655 bail_devdata:
656         ipath_free_devdata(pdev, dd);
657
658 bail:
659         return ret;
660 }
661
662 static void __devexit cleanup_device(struct ipath_devdata *dd)
663 {
664         int port;
665         struct ipath_portdata **tmp;
666         unsigned long flags;
667
668         if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
669                 /* can't do anything more with chip; needs re-init */
670                 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
671                 if (dd->ipath_kregbase) {
672                         /*
673                          * if we haven't already cleaned up before these are
674                          * to ensure any register reads/writes "fail" until
675                          * re-init
676                          */
677                         dd->ipath_kregbase = NULL;
678                         dd->ipath_uregbase = 0;
679                         dd->ipath_sregbase = 0;
680                         dd->ipath_cregbase = 0;
681                         dd->ipath_kregsize = 0;
682                 }
683                 ipath_disable_wc(dd);
684         }
685
686         if (dd->ipath_spectriggerhit)
687                 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
688                          dd->ipath_spectriggerhit);
689
690         if (dd->ipath_pioavailregs_dma) {
691                 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
692                                   (void *) dd->ipath_pioavailregs_dma,
693                                   dd->ipath_pioavailregs_phys);
694                 dd->ipath_pioavailregs_dma = NULL;
695         }
696         if (dd->ipath_dummy_hdrq) {
697                 dma_free_coherent(&dd->pcidev->dev,
698                         dd->ipath_pd[0]->port_rcvhdrq_size,
699                         dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
700                 dd->ipath_dummy_hdrq = NULL;
701         }
702
703         if (dd->ipath_pageshadow) {
704                 struct page **tmpp = dd->ipath_pageshadow;
705                 dma_addr_t *tmpd = dd->ipath_physshadow;
706                 int i, cnt = 0;
707
708                 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
709                            "locked\n");
710                 for (port = 0; port < dd->ipath_cfgports; port++) {
711                         int port_tidbase = port * dd->ipath_rcvtidcnt;
712                         int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
713                         for (i = port_tidbase; i < maxtid; i++) {
714                                 if (!tmpp[i])
715                                         continue;
716                                 pci_unmap_page(dd->pcidev, tmpd[i],
717                                         PAGE_SIZE, PCI_DMA_FROMDEVICE);
718                                 ipath_release_user_pages(&tmpp[i], 1);
719                                 tmpp[i] = NULL;
720                                 cnt++;
721                         }
722                 }
723                 if (cnt) {
724                         ipath_stats.sps_pageunlocks += cnt;
725                         ipath_cdbg(VERBOSE, "There were still %u expTID "
726                                    "entries locked\n", cnt);
727                 }
728                 if (ipath_stats.sps_pagelocks ||
729                     ipath_stats.sps_pageunlocks)
730                         ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
731                                    "unlocked via ipath_m{un}lock\n",
732                                    (unsigned long long)
733                                    ipath_stats.sps_pagelocks,
734                                    (unsigned long long)
735                                    ipath_stats.sps_pageunlocks);
736
737                 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
738                            dd->ipath_pageshadow);
739                 tmpp = dd->ipath_pageshadow;
740                 dd->ipath_pageshadow = NULL;
741                 vfree(tmpp);
742
743                 dd->ipath_egrtidbase = NULL;
744         }
745
746         /*
747          * free any resources still in use (usually just kernel ports)
748          * at unload; we do for portcnt, because that's what we allocate.
749          * We acquire lock to be really paranoid that ipath_pd isn't being
750          * accessed from some interrupt-related code (that should not happen,
751          * but best to be sure).
752          */
753         spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
754         tmp = dd->ipath_pd;
755         dd->ipath_pd = NULL;
756         spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
757         for (port = 0; port < dd->ipath_portcnt; port++) {
758                 struct ipath_portdata *pd = tmp[port];
759                 tmp[port] = NULL; /* debugging paranoia */
760                 ipath_free_pddata(dd, pd);
761         }
762         kfree(tmp);
763 }
764
765 static void __devexit ipath_remove_one(struct pci_dev *pdev)
766 {
767         struct ipath_devdata *dd = pci_get_drvdata(pdev);
768
769         ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
770
771         /*
772          * disable the IB link early, to be sure no new packets arrive, which
773          * complicates the shutdown process
774          */
775         ipath_shutdown_device(dd);
776
777         flush_scheduled_work();
778
779         if (dd->verbs_dev)
780                 ipath_unregister_ib_device(dd->verbs_dev);
781
782         ipath_diag_remove(dd);
783         ipath_user_remove(dd);
784         ipathfs_remove_device(dd);
785         ipath_device_remove_group(&pdev->dev, dd);
786
787         ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
788                    "unit %u\n", dd, (u32) dd->ipath_unit);
789
790         cleanup_device(dd);
791
792         /*
793          * turn off rcv, send, and interrupts for all ports, all drivers
794          * should also hard reset the chip here?
795          * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
796          * for all versions of the driver, if they were allocated
797          */
798         if (dd->ipath_irq) {
799                 ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
800                            dd->ipath_unit, dd->ipath_irq);
801                 dd->ipath_f_free_irq(dd);
802         } else
803                 ipath_dbg("irq is 0, not doing free_irq "
804                           "for unit %u\n", dd->ipath_unit);
805         /*
806          * we check for NULL here, because it's outside
807          * the kregbase check, and we need to call it
808          * after the free_irq.  Thus it's possible that
809          * the function pointers were never initialized.
810          */
811         if (dd->ipath_f_cleanup)
812                 /* clean up chip-specific stuff */
813                 dd->ipath_f_cleanup(dd);
814
815         ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
816         iounmap((volatile void __iomem *) dd->ipath_kregbase);
817         pci_release_regions(pdev);
818         ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
819         pci_disable_device(pdev);
820
821         ipath_free_devdata(pdev, dd);
822 }
823
824 /* general driver use */
825 DEFINE_MUTEX(ipath_mutex);
826
827 static DEFINE_SPINLOCK(ipath_pioavail_lock);
828
829 /**
830  * ipath_disarm_piobufs - cancel a range of PIO buffers
831  * @dd: the infinipath device
832  * @first: the first PIO buffer to cancel
833  * @cnt: the number of PIO buffers to cancel
834  *
835  * cancel a range of PIO buffers, used when they might be armed, but
836  * not triggered.  Used at init to ensure buffer state, and also user
837  * process close, in case it died while writing to a PIO buffer
838  * Also after errors.
839  */
840 void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
841                           unsigned cnt)
842 {
843         unsigned i, last = first + cnt;
844         unsigned long flags;
845
846         ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
847         for (i = first; i < last; i++) {
848                 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
849                 /*
850                  * The disarm-related bits are write-only, so it
851                  * is ok to OR them in with our copy of sendctrl
852                  * while we hold the lock.
853                  */
854                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
855                         dd->ipath_sendctrl | INFINIPATH_S_DISARM |
856                         (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
857                 /* can't disarm bufs back-to-back per iba7220 spec */
858                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
859                 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
860         }
861         /* on some older chips, update may not happen after cancel */
862         ipath_force_pio_avail_update(dd);
863 }
864
865 /**
866  * ipath_wait_linkstate - wait for an IB link state change to occur
867  * @dd: the infinipath device
868  * @state: the state to wait for
869  * @msecs: the number of milliseconds to wait
870  *
871  * wait up to msecs milliseconds for IB link state change to occur for
872  * now, take the easy polling route.  Currently used only by
873  * ipath_set_linkstate.  Returns 0 if state reached, otherwise
874  * -ETIMEDOUT state can have multiple states set, for any of several
875  * transitions.
876  */
877 int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
878 {
879         dd->ipath_state_wanted = state;
880         wait_event_interruptible_timeout(ipath_state_wait,
881                                          (dd->ipath_flags & state),
882                                          msecs_to_jiffies(msecs));
883         dd->ipath_state_wanted = 0;
884
885         if (!(dd->ipath_flags & state)) {
886                 u64 val;
887                 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
888                            " ms\n",
889                            /* test INIT ahead of DOWN, both can be set */
890                            (state & IPATH_LINKINIT) ? "INIT" :
891                            ((state & IPATH_LINKDOWN) ? "DOWN" :
892                             ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
893                            msecs);
894                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
895                 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
896                            (unsigned long long) ipath_read_kreg64(
897                                    dd, dd->ipath_kregs->kr_ibcctrl),
898                            (unsigned long long) val,
899                            ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
900         }
901         return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
902 }
903
904 static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
905         char *buf, size_t blen)
906 {
907         static const struct {
908                 ipath_err_t err;
909                 const char *msg;
910         } errs[] = {
911                 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
912                 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
913                 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
914                 { INFINIPATH_E_SDMABASE, "SDmaBase" },
915                 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
916                 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
917                 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
918                 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
919                 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
920                 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
921                 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
922                 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
923         };
924         int i;
925         int expected;
926         size_t bidx = 0;
927
928         for (i = 0; i < ARRAY_SIZE(errs); i++) {
929                 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
930                         test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
931                 if ((err & errs[i].err) && !expected)
932                         bidx += snprintf(buf + bidx, blen - bidx,
933                                          "%s ", errs[i].msg);
934         }
935 }
936
937 /*
938  * Decode the error status into strings, deciding whether to always
939  * print * it or not depending on "normal packet errors" vs everything
940  * else.   Return 1 if "real" errors, otherwise 0 if only packet
941  * errors, so caller can decide what to print with the string.
942  */
943 int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
944         ipath_err_t err)
945 {
946         int iserr = 1;
947         *buf = '\0';
948         if (err & INFINIPATH_E_PKTERRS) {
949                 if (!(err & ~INFINIPATH_E_PKTERRS))
950                         iserr = 0; // if only packet errors.
951                 if (ipath_debug & __IPATH_ERRPKTDBG) {
952                         if (err & INFINIPATH_E_REBP)
953                                 strlcat(buf, "EBP ", blen);
954                         if (err & INFINIPATH_E_RVCRC)
955                                 strlcat(buf, "VCRC ", blen);
956                         if (err & INFINIPATH_E_RICRC) {
957                                 strlcat(buf, "CRC ", blen);
958                                 // clear for check below, so only once
959                                 err &= INFINIPATH_E_RICRC;
960                         }
961                         if (err & INFINIPATH_E_RSHORTPKTLEN)
962                                 strlcat(buf, "rshortpktlen ", blen);
963                         if (err & INFINIPATH_E_SDROPPEDDATAPKT)
964                                 strlcat(buf, "sdroppeddatapkt ", blen);
965                         if (err & INFINIPATH_E_SPKTLEN)
966                                 strlcat(buf, "spktlen ", blen);
967                 }
968                 if ((err & INFINIPATH_E_RICRC) &&
969                         !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
970                         strlcat(buf, "CRC ", blen);
971                 if (!iserr)
972                         goto done;
973         }
974         if (err & INFINIPATH_E_RHDRLEN)
975                 strlcat(buf, "rhdrlen ", blen);
976         if (err & INFINIPATH_E_RBADTID)
977                 strlcat(buf, "rbadtid ", blen);
978         if (err & INFINIPATH_E_RBADVERSION)
979                 strlcat(buf, "rbadversion ", blen);
980         if (err & INFINIPATH_E_RHDR)
981                 strlcat(buf, "rhdr ", blen);
982         if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
983                 strlcat(buf, "sendspecialtrigger ", blen);
984         if (err & INFINIPATH_E_RLONGPKTLEN)
985                 strlcat(buf, "rlongpktlen ", blen);
986         if (err & INFINIPATH_E_RMAXPKTLEN)
987                 strlcat(buf, "rmaxpktlen ", blen);
988         if (err & INFINIPATH_E_RMINPKTLEN)
989                 strlcat(buf, "rminpktlen ", blen);
990         if (err & INFINIPATH_E_SMINPKTLEN)
991                 strlcat(buf, "sminpktlen ", blen);
992         if (err & INFINIPATH_E_RFORMATERR)
993                 strlcat(buf, "rformaterr ", blen);
994         if (err & INFINIPATH_E_RUNSUPVL)
995                 strlcat(buf, "runsupvl ", blen);
996         if (err & INFINIPATH_E_RUNEXPCHAR)
997                 strlcat(buf, "runexpchar ", blen);
998         if (err & INFINIPATH_E_RIBFLOW)
999                 strlcat(buf, "ribflow ", blen);
1000         if (err & INFINIPATH_E_SUNDERRUN)
1001                 strlcat(buf, "sunderrun ", blen);
1002         if (err & INFINIPATH_E_SPIOARMLAUNCH)
1003                 strlcat(buf, "spioarmlaunch ", blen);
1004         if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
1005                 strlcat(buf, "sunexperrpktnum ", blen);
1006         if (err & INFINIPATH_E_SDROPPEDSMPPKT)
1007                 strlcat(buf, "sdroppedsmppkt ", blen);
1008         if (err & INFINIPATH_E_SMAXPKTLEN)
1009                 strlcat(buf, "smaxpktlen ", blen);
1010         if (err & INFINIPATH_E_SUNSUPVL)
1011                 strlcat(buf, "sunsupVL ", blen);
1012         if (err & INFINIPATH_E_INVALIDADDR)
1013                 strlcat(buf, "invalidaddr ", blen);
1014         if (err & INFINIPATH_E_RRCVEGRFULL)
1015                 strlcat(buf, "rcvegrfull ", blen);
1016         if (err & INFINIPATH_E_RRCVHDRFULL)
1017                 strlcat(buf, "rcvhdrfull ", blen);
1018         if (err & INFINIPATH_E_IBSTATUSCHANGED)
1019                 strlcat(buf, "ibcstatuschg ", blen);
1020         if (err & INFINIPATH_E_RIBLOSTLINK)
1021                 strlcat(buf, "riblostlink ", blen);
1022         if (err & INFINIPATH_E_HARDWARE)
1023                 strlcat(buf, "hardware ", blen);
1024         if (err & INFINIPATH_E_RESET)
1025                 strlcat(buf, "reset ", blen);
1026         if (err & INFINIPATH_E_SDMAERRS)
1027                 decode_sdma_errs(dd, err, buf, blen);
1028         if (err & INFINIPATH_E_INVALIDEEPCMD)
1029                 strlcat(buf, "invalideepromcmd ", blen);
1030 done:
1031         return iserr;
1032 }
1033
1034 /**
1035  * get_rhf_errstring - decode RHF errors
1036  * @err: the err number
1037  * @msg: the output buffer
1038  * @len: the length of the output buffer
1039  *
1040  * only used one place now, may want more later
1041  */
1042 static void get_rhf_errstring(u32 err, char *msg, size_t len)
1043 {
1044         /* if no errors, and so don't need to check what's first */
1045         *msg = '\0';
1046
1047         if (err & INFINIPATH_RHF_H_ICRCERR)
1048                 strlcat(msg, "icrcerr ", len);
1049         if (err & INFINIPATH_RHF_H_VCRCERR)
1050                 strlcat(msg, "vcrcerr ", len);
1051         if (err & INFINIPATH_RHF_H_PARITYERR)
1052                 strlcat(msg, "parityerr ", len);
1053         if (err & INFINIPATH_RHF_H_LENERR)
1054                 strlcat(msg, "lenerr ", len);
1055         if (err & INFINIPATH_RHF_H_MTUERR)
1056                 strlcat(msg, "mtuerr ", len);
1057         if (err & INFINIPATH_RHF_H_IHDRERR)
1058                 /* infinipath hdr checksum error */
1059                 strlcat(msg, "ipathhdrerr ", len);
1060         if (err & INFINIPATH_RHF_H_TIDERR)
1061                 strlcat(msg, "tiderr ", len);
1062         if (err & INFINIPATH_RHF_H_MKERR)
1063                 /* bad port, offset, etc. */
1064                 strlcat(msg, "invalid ipathhdr ", len);
1065         if (err & INFINIPATH_RHF_H_IBERR)
1066                 strlcat(msg, "iberr ", len);
1067         if (err & INFINIPATH_RHF_L_SWA)
1068                 strlcat(msg, "swA ", len);
1069         if (err & INFINIPATH_RHF_L_SWB)
1070                 strlcat(msg, "swB ", len);
1071 }
1072
1073 /**
1074  * ipath_get_egrbuf - get an eager buffer
1075  * @dd: the infinipath device
1076  * @bufnum: the eager buffer to get
1077  *
1078  * must only be called if ipath_pd[port] is known to be allocated
1079  */
1080 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
1081 {
1082         return dd->ipath_port0_skbinfo ?
1083                 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
1084 }
1085
1086 /**
1087  * ipath_alloc_skb - allocate an skb and buffer with possible constraints
1088  * @dd: the infinipath device
1089  * @gfp_mask: the sk_buff SFP mask
1090  */
1091 struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
1092                                 gfp_t gfp_mask)
1093 {
1094         struct sk_buff *skb;
1095         u32 len;
1096
1097         /*
1098          * Only fully supported way to handle this is to allocate lots
1099          * extra, align as needed, and then do skb_reserve().  That wastes
1100          * a lot of memory...  I'll have to hack this into infinipath_copy
1101          * also.
1102          */
1103
1104         /*
1105          * We need 2 extra bytes for ipath_ether data sent in the
1106          * key header.  In order to keep everything dword aligned,
1107          * we'll reserve 4 bytes.
1108          */
1109         len = dd->ipath_ibmaxlen + 4;
1110
1111         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1112                 /* We need a 2KB multiple alignment, and there is no way
1113                  * to do it except to allocate extra and then skb_reserve
1114                  * enough to bring it up to the right alignment.
1115                  */
1116                 len += 2047;
1117         }
1118
1119         skb = __dev_alloc_skb(len, gfp_mask);
1120         if (!skb) {
1121                 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
1122                               len);
1123                 goto bail;
1124         }
1125
1126         skb_reserve(skb, 4);
1127
1128         if (dd->ipath_flags & IPATH_4BYTE_TID) {
1129                 u32 una = (unsigned long)skb->data & 2047;
1130                 if (una)
1131                         skb_reserve(skb, 2048 - una);
1132         }
1133
1134 bail:
1135         return skb;
1136 }
1137
1138 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
1139                              u32 eflags,
1140                              u32 l,
1141                              u32 etail,
1142                              __le32 *rhf_addr,
1143                              struct ipath_message_header *hdr)
1144 {
1145         char emsg[128];
1146
1147         get_rhf_errstring(eflags, emsg, sizeof emsg);
1148         ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
1149                    "tlen=%x opcode=%x egridx=%x: %s\n",
1150                    eflags, l,
1151                    ipath_hdrget_rcv_type(rhf_addr),
1152                    ipath_hdrget_length_in_bytes(rhf_addr),
1153                    be32_to_cpu(hdr->bth[0]) >> 24,
1154                    etail, emsg);
1155
1156         /* Count local link integrity errors. */
1157         if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
1158                 u8 n = (dd->ipath_ibcctrl >>
1159                         INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1160                         INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1161
1162                 if (++dd->ipath_lli_counter > n) {
1163                         dd->ipath_lli_counter = 0;
1164                         dd->ipath_lli_errors++;
1165                 }
1166         }
1167 }
1168
1169 /*
1170  * ipath_kreceive - receive a packet
1171  * @pd: the infinipath port
1172  *
1173  * called from interrupt handler for errors or receive interrupt
1174  */
1175 void ipath_kreceive(struct ipath_portdata *pd)
1176 {
1177         struct ipath_devdata *dd = pd->port_dd;
1178         __le32 *rhf_addr;
1179         void *ebuf;
1180         const u32 rsize = dd->ipath_rcvhdrentsize;      /* words */
1181         const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
1182         u32 etail = -1, l, hdrqtail;
1183         struct ipath_message_header *hdr;
1184         u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
1185         static u64 totcalls;    /* stats, may eventually remove */
1186         int last;
1187
1188         l = pd->port_head;
1189         rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
1190         if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
1191                 u32 seq = ipath_hdrget_seq(rhf_addr);
1192
1193                 if (seq != pd->port_seq_cnt)
1194                         goto bail;
1195                 hdrqtail = 0;
1196         } else {
1197                 hdrqtail = ipath_get_rcvhdrtail(pd);
1198                 if (l == hdrqtail)
1199                         goto bail;
1200                 smp_rmb();
1201         }
1202
1203 reloop:
1204         for (last = 0, i = 1; !last; i += !last) {
1205                 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
1206                 eflags = ipath_hdrget_err_flags(rhf_addr);
1207                 etype = ipath_hdrget_rcv_type(rhf_addr);
1208                 /* total length */
1209                 tlen = ipath_hdrget_length_in_bytes(rhf_addr);
1210                 ebuf = NULL;
1211                 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
1212                     ipath_hdrget_use_egr_buf(rhf_addr) :
1213                     (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
1214                         /*
1215                          * It turns out that the chip uses an eager buffer
1216                          * for all non-expected packets, whether it "needs"
1217                          * one or not.  So always get the index, but don't
1218                          * set ebuf (so we try to copy data) unless the
1219                          * length requires it.
1220                          */
1221                         etail = ipath_hdrget_index(rhf_addr);
1222                         updegr = 1;
1223                         if (tlen > sizeof(*hdr) ||
1224                             etype == RCVHQ_RCV_TYPE_NON_KD)
1225                                 ebuf = ipath_get_egrbuf(dd, etail);
1226                 }
1227
1228                 /*
1229                  * both tiderr and ipathhdrerr are set for all plain IB
1230                  * packets; only ipathhdrerr should be set.
1231                  */
1232
1233                 if (etype != RCVHQ_RCV_TYPE_NON_KD &&
1234                     etype != RCVHQ_RCV_TYPE_ERROR &&
1235                     ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
1236                     IPS_PROTO_VERSION)
1237                         ipath_cdbg(PKT, "Bad InfiniPath protocol version "
1238                                    "%x\n", etype);
1239
1240                 if (unlikely(eflags))
1241                         ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
1242                 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
1243                         ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
1244                         if (dd->ipath_lli_counter)
1245                                 dd->ipath_lli_counter--;
1246                 } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
1247                         u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
1248                         u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
1249                         ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1250                                    "qp=%x), len %x; ignored\n",
1251                                    etype, opcode, qp, tlen);
1252                 }
1253                 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
1254                         ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1255                                   be32_to_cpu(hdr->bth[0]) >> 24);
1256                 else {
1257                         /*
1258                          * error packet, type of error unknown.
1259                          * Probably type 3, but we don't know, so don't
1260                          * even try to print the opcode, etc.
1261                          * Usually caused by a "bad packet", that has no
1262                          * BTH, when the LRH says it should.
1263                          */
1264                         ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
1265                                   " %x, len %x hdrq+%x rhf: %Lx\n",
1266                                   etail, tlen, l, (unsigned long long)
1267                                   le64_to_cpu(*(__le64 *) rhf_addr));
1268                         if (ipath_debug & __IPATH_ERRPKTDBG) {
1269                                 u32 j, *d, dw = rsize-2;
1270                                 if (rsize > (tlen>>2))
1271                                         dw = tlen>>2;
1272                                 d = (u32 *)hdr;
1273                                 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
1274                                         dw);
1275                                 for (j = 0; j < dw; j++)
1276                                         printk(KERN_DEBUG "%8x%s", d[j],
1277                                                 (j%8) == 7 ? "\n" : " ");
1278                                 printk(KERN_DEBUG ".\n");
1279                         }
1280                 }
1281                 l += rsize;
1282                 if (l >= maxcnt)
1283                         l = 0;
1284                 rhf_addr = (__le32 *) pd->port_rcvhdrq +
1285                         l + dd->ipath_rhf_offset;
1286                 if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
1287                         u32 seq = ipath_hdrget_seq(rhf_addr);
1288
1289                         if (++pd->port_seq_cnt > 13)
1290                                 pd->port_seq_cnt = 1;
1291                         if (seq != pd->port_seq_cnt)
1292                                 last = 1;
1293                 } else if (l == hdrqtail)
1294                         last = 1;
1295                 /*
1296                  * update head regs on last packet, and every 16 packets.
1297                  * Reduce bus traffic, while still trying to prevent
1298                  * rcvhdrq overflows, for when the queue is nearly full
1299                  */
1300                 if (last || !(i & 0xf)) {
1301                         u64 lval = l;
1302
1303                         /* request IBA6120 and 7220 interrupt only on last */
1304                         if (last)
1305                                 lval |= dd->ipath_rhdrhead_intr_off;
1306                         ipath_write_ureg(dd, ur_rcvhdrhead, lval,
1307                                 pd->port_port);
1308                         if (updegr) {
1309                                 ipath_write_ureg(dd, ur_rcvegrindexhead,
1310                                                  etail, pd->port_port);
1311                                 updegr = 0;
1312                         }
1313                 }
1314         }
1315
1316         if (!dd->ipath_rhdrhead_intr_off && !reloop &&
1317             !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1318                 /* IBA6110 workaround; we can have a race clearing chip
1319                  * interrupt with another interrupt about to be delivered,
1320                  * and can clear it before it is delivered on the GPIO
1321                  * workaround.  By doing the extra check here for the
1322                  * in-memory tail register updating while we were doing
1323                  * earlier packets, we "almost" guarantee we have covered
1324                  * that case.
1325                  */
1326                 u32 hqtail = ipath_get_rcvhdrtail(pd);
1327                 if (hqtail != hdrqtail) {
1328                         hdrqtail = hqtail;
1329                         reloop = 1; /* loop 1 extra time at most */
1330                         goto reloop;
1331                 }
1332         }
1333
1334         pkttot += i;
1335
1336         pd->port_head = l;
1337
1338         if (pkttot > ipath_stats.sps_maxpkts_call)
1339                 ipath_stats.sps_maxpkts_call = pkttot;
1340         ipath_stats.sps_port0pkts += pkttot;
1341         ipath_stats.sps_avgpkts_call =
1342                 ipath_stats.sps_port0pkts / ++totcalls;
1343
1344 bail:;
1345 }
1346
1347 /**
1348  * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1349  * @dd: the infinipath device
1350  *
1351  * called whenever our local copy indicates we have run out of send buffers
1352  * NOTE: This can be called from interrupt context by some code
1353  * and from non-interrupt context by ipath_getpiobuf().
1354  */
1355
1356 static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1357 {
1358         unsigned long flags;
1359         int i;
1360         const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1361
1362         /* If the generation (check) bits have changed, then we update the
1363          * busy bit for the corresponding PIO buffer.  This algorithm will
1364          * modify positions to the value they already have in some cases
1365          * (i.e., no change), but it's faster than changing only the bits
1366          * that have changed.
1367          *
1368          * We would like to do this atomicly, to avoid spinlocks in the
1369          * critical send path, but that's not really possible, given the
1370          * type of changes, and that this routine could be called on
1371          * multiple cpu's simultaneously, so we lock in this routine only,
1372          * to avoid conflicting updates; all we change is the shadow, and
1373          * it's a single 64 bit memory location, so by definition the update
1374          * is atomic in terms of what other cpu's can see in testing the
1375          * bits.  The spin_lock overhead isn't too bad, since it only
1376          * happens when all buffers are in use, so only cpu overhead, not
1377          * latency or bandwidth is affected.
1378          */
1379         if (!dd->ipath_pioavailregs_dma) {
1380                 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1381                 return;
1382         }
1383         if (ipath_debug & __IPATH_VERBDBG) {
1384                 /* only if packet debug and verbose */
1385                 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1386                 unsigned long *shadow = dd->ipath_pioavailshadow;
1387
1388                 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1389                            "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1390                            "s3=%lx\n",
1391                            (unsigned long long) le64_to_cpu(dma[0]),
1392                            shadow[0],
1393                            (unsigned long long) le64_to_cpu(dma[1]),
1394                            shadow[1],
1395                            (unsigned long long) le64_to_cpu(dma[2]),
1396                            shadow[2],
1397                            (unsigned long long) le64_to_cpu(dma[3]),
1398                            shadow[3]);
1399                 if (piobregs > 4)
1400                         ipath_cdbg(
1401                                 PKT, "2nd group, dma4=%llx shad4=%lx, "
1402                                 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1403                                 "d7=%llx s7=%lx\n",
1404                                 (unsigned long long) le64_to_cpu(dma[4]),
1405                                 shadow[4],
1406                                 (unsigned long long) le64_to_cpu(dma[5]),
1407                                 shadow[5],
1408                                 (unsigned long long) le64_to_cpu(dma[6]),
1409                                 shadow[6],
1410                                 (unsigned long long) le64_to_cpu(dma[7]),
1411                                 shadow[7]);
1412         }
1413         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1414         for (i = 0; i < piobregs; i++) {
1415                 u64 pchbusy, pchg, piov, pnew;
1416                 /*
1417                  * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1418                  */
1419                 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
1420                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
1421                 else
1422                         piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1423                 pchg = dd->ipath_pioavailkernel[i] &
1424                         ~(dd->ipath_pioavailshadow[i] ^ piov);
1425                 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1426                 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1427                         pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1428                         pnew |= piov & pchbusy;
1429                         dd->ipath_pioavailshadow[i] = pnew;
1430                 }
1431         }
1432         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1433 }
1434
1435 /*
1436  * used to force update of pioavailshadow if we can't get a pio buffer.
1437  * Needed primarily due to exitting freeze mode after recovering
1438  * from errors.  Done lazily, because it's safer (known to not
1439  * be writing pio buffers).
1440  */
1441 static void ipath_reset_availshadow(struct ipath_devdata *dd)
1442 {
1443         int i, im;
1444         unsigned long flags;
1445
1446         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1447         for (i = 0; i < dd->ipath_pioavregs; i++) {
1448                 u64 val, oldval;
1449                 /* deal with 6110 chip bug on high register #s */
1450                 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1451                         i ^ 1 : i;
1452                 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
1453                 /*
1454                  * busy out the buffers not in the kernel avail list,
1455                  * without changing the generation bits.
1456                  */
1457                 oldval = dd->ipath_pioavailshadow[i];
1458                 dd->ipath_pioavailshadow[i] = val |
1459                         ((~dd->ipath_pioavailkernel[i] <<
1460                         INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
1461                         0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
1462                 if (oldval != dd->ipath_pioavailshadow[i])
1463                         ipath_dbg("shadow[%d] was %Lx, now %lx\n",
1464                                 i, (unsigned long long) oldval,
1465                                 dd->ipath_pioavailshadow[i]);
1466         }
1467         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1468 }
1469
1470 /**
1471  * ipath_setrcvhdrsize - set the receive header size
1472  * @dd: the infinipath device
1473  * @rhdrsize: the receive header size
1474  *
1475  * called from user init code, and also layered driver init
1476  */
1477 int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1478 {
1479         int ret = 0;
1480
1481         if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1482                 if (dd->ipath_rcvhdrsize != rhdrsize) {
1483                         dev_info(&dd->pcidev->dev,
1484                                  "Error: can't set protocol header "
1485                                  "size %u, already %u\n",
1486                                  rhdrsize, dd->ipath_rcvhdrsize);
1487                         ret = -EAGAIN;
1488                 } else
1489                         ipath_cdbg(VERBOSE, "Reuse same protocol header "
1490                                    "size %u\n", dd->ipath_rcvhdrsize);
1491         } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1492                                (sizeof(u64) / sizeof(u32)))) {
1493                 ipath_dbg("Error: can't set protocol header size %u "
1494                           "(> max %u)\n", rhdrsize,
1495                           dd->ipath_rcvhdrentsize -
1496                           (u32) (sizeof(u64) / sizeof(u32)));
1497                 ret = -EOVERFLOW;
1498         } else {
1499                 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1500                 dd->ipath_rcvhdrsize = rhdrsize;
1501                 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1502                                  dd->ipath_rcvhdrsize);
1503                 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1504                            dd->ipath_rcvhdrsize);
1505         }
1506         return ret;
1507 }
1508
1509 /*
1510  * debugging code and stats updates if no pio buffers available.
1511  */
1512 static noinline void no_pio_bufs(struct ipath_devdata *dd)
1513 {
1514         unsigned long *shadow = dd->ipath_pioavailshadow;
1515         __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
1516
1517         dd->ipath_upd_pio_shadow = 1;
1518
1519         /*
1520          * not atomic, but if we lose a stat count in a while, that's OK
1521          */
1522         ipath_stats.sps_nopiobufs++;
1523         if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1524                 ipath_force_pio_avail_update(dd); /* at start */
1525                 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
1526                         "%llx %llx %llx %llx\n"
1527                         "ipath  shadow:  %lx %lx %lx %lx\n",
1528                         dd->ipath_consec_nopiobuf,
1529                         (unsigned long)get_cycles(),
1530                         (unsigned long long) le64_to_cpu(dma[0]),
1531                         (unsigned long long) le64_to_cpu(dma[1]),
1532                         (unsigned long long) le64_to_cpu(dma[2]),
1533                         (unsigned long long) le64_to_cpu(dma[3]),
1534                         shadow[0], shadow[1], shadow[2], shadow[3]);
1535                 /*
1536                  * 4 buffers per byte, 4 registers above, cover rest
1537                  * below
1538                  */
1539                 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1540                     (sizeof(shadow[0]) * 4 * 4))
1541                         ipath_dbg("2nd group: dmacopy: "
1542                                   "%llx %llx %llx %llx\n"
1543                                   "ipath  shadow:  %lx %lx %lx %lx\n",
1544                                   (unsigned long long)le64_to_cpu(dma[4]),
1545                                   (unsigned long long)le64_to_cpu(dma[5]),
1546                                   (unsigned long long)le64_to_cpu(dma[6]),
1547                                   (unsigned long long)le64_to_cpu(dma[7]),
1548                                   shadow[4], shadow[5], shadow[6], shadow[7]);
1549
1550                 /* at end, so update likely happened */
1551                 ipath_reset_availshadow(dd);
1552         }
1553 }
1554
1555 /*
1556  * common code for normal driver pio buffer allocation, and reserved
1557  * allocation.
1558  *
1559  * do appropriate marking as busy, etc.
1560  * returns buffer number if one found (>=0), negative number is error.
1561  */
1562 static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
1563         u32 *pbufnum, u32 first, u32 last, u32 firsti)
1564 {
1565         int i, j, updated = 0;
1566         unsigned piobcnt;
1567         unsigned long flags;
1568         unsigned long *shadow = dd->ipath_pioavailshadow;
1569         u32 __iomem *buf;
1570
1571         piobcnt = last - first;
1572         if (dd->ipath_upd_pio_shadow) {
1573                 /*
1574                  * Minor optimization.  If we had no buffers on last call,
1575                  * start out by doing the update; continue and do scan even
1576                  * if no buffers were updated, to be paranoid
1577                  */
1578                 ipath_update_pio_bufs(dd);
1579                 updated++;
1580                 i = first;
1581         } else
1582                 i = firsti;
1583 rescan:
1584         /*
1585          * while test_and_set_bit() is atomic, we do that and then the
1586          * change_bit(), and the pair is not.  See if this is the cause
1587          * of the remaining armlaunch errors.
1588          */
1589         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1590         for (j = 0; j < piobcnt; j++, i++) {
1591                 if (i >= last)
1592                         i = first;
1593                 if (__test_and_set_bit((2 * i) + 1, shadow))
1594                         continue;
1595                 /* flip generation bit */
1596                 __change_bit(2 * i, shadow);
1597                 break;
1598         }
1599         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1600
1601         if (j == piobcnt) {
1602                 if (!updated) {
1603                         /*
1604                          * first time through; shadow exhausted, but may be
1605                          * buffers available, try an update and then rescan.
1606                          */
1607                         ipath_update_pio_bufs(dd);
1608                         updated++;
1609                         i = first;
1610                         goto rescan;
1611                 } else if (updated == 1 && piobcnt <=
1612                         ((dd->ipath_sendctrl
1613                         >> INFINIPATH_S_UPDTHRESH_SHIFT) &
1614                         INFINIPATH_S_UPDTHRESH_MASK)) {
1615                         /*
1616                          * for chips supporting and using the update
1617                          * threshold we need to force an update of the
1618                          * in-memory copy if the count is less than the
1619                          * thershold, then check one more time.
1620                          */
1621                         ipath_force_pio_avail_update(dd);
1622                         ipath_update_pio_bufs(dd);
1623                         updated++;
1624                         i = first;
1625                         goto rescan;
1626                 }
1627
1628                 no_pio_bufs(dd);
1629                 buf = NULL;
1630         } else {
1631                 if (i < dd->ipath_piobcnt2k)
1632                         buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1633                                                i * dd->ipath_palign);
1634                 else
1635                         buf = (u32 __iomem *)
1636                                 (dd->ipath_pio4kbase +
1637                                  (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1638                 if (pbufnum)
1639                         *pbufnum = i;
1640         }
1641
1642         return buf;
1643 }
1644
1645 /**
1646  * ipath_getpiobuf - find an available pio buffer
1647  * @dd: the infinipath device
1648  * @plen: the size of the PIO buffer needed in 32-bit words
1649  * @pbufnum: the buffer number is placed here
1650  */
1651 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
1652 {
1653         u32 __iomem *buf;
1654         u32 pnum, nbufs;
1655         u32 first, lasti;
1656
1657         if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
1658                 first = dd->ipath_piobcnt2k;
1659                 lasti = dd->ipath_lastpioindexl;
1660         } else {
1661                 first = 0;
1662                 lasti = dd->ipath_lastpioindex;
1663         }
1664         nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
1665         buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
1666
1667         if (buf) {
1668                 /*
1669                  * Set next starting place.  It's just an optimization,
1670                  * it doesn't matter who wins on this, so no locking
1671                  */
1672                 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1673                         dd->ipath_lastpioindexl = pnum + 1;
1674                 else
1675                         dd->ipath_lastpioindex = pnum + 1;
1676                 if (dd->ipath_upd_pio_shadow)
1677                         dd->ipath_upd_pio_shadow = 0;
1678                 if (dd->ipath_consec_nopiobuf)
1679                         dd->ipath_consec_nopiobuf = 0;
1680                 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1681                            pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1682                 if (pbufnum)
1683                         *pbufnum = pnum;
1684
1685         }
1686         return buf;
1687 }
1688
1689 /**
1690  * ipath_chg_pioavailkernel - change which send buffers are available for kernel
1691  * @dd: the infinipath device
1692  * @start: the starting send buffer number
1693  * @len: the number of send buffers
1694  * @avail: true if the buffers are available for kernel use, false otherwise
1695  */
1696 void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1697                               unsigned len, int avail)
1698 {
1699         unsigned long flags;
1700         unsigned end, cnt = 0, next;
1701
1702         /* There are two bits per send buffer (busy and generation) */
1703         start *= 2;
1704         end = start + len * 2;
1705
1706         spin_lock_irqsave(&ipath_pioavail_lock, flags);
1707         /* Set or clear the busy bit in the shadow. */
1708         while (start < end) {
1709                 if (avail) {
1710                         unsigned long dma;
1711                         int i, im;
1712                         /*
1713                          * the BUSY bit will never be set, because we disarm
1714                          * the user buffers before we hand them back to the
1715                          * kernel.  We do have to make sure the generation
1716                          * bit is set correctly in shadow, since it could
1717                          * have changed many times while allocated to user.
1718                          * We can't use the bitmap functions on the full
1719                          * dma array because it is always little-endian, so
1720                          * we have to flip to host-order first.
1721                          * BITS_PER_LONG is slightly wrong, since it's
1722                          * always 64 bits per register in chip...
1723                          * We only work on 64 bit kernels, so that's OK.
1724                          */
1725                         /* deal with 6110 chip bug on high register #s */
1726                         i = start / BITS_PER_LONG;
1727                         im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1728                                 i ^ 1 : i;
1729                         __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
1730                                 + start, dd->ipath_pioavailshadow);
1731                         dma = (unsigned long) le64_to_cpu(
1732                                 dd->ipath_pioavailregs_dma[im]);
1733                         if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1734                                 + start) % BITS_PER_LONG, &dma))
1735                                 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1736                                         + start, dd->ipath_pioavailshadow);
1737                         else
1738                                 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1739                                         + start, dd->ipath_pioavailshadow);
1740                         __set_bit(start, dd->ipath_pioavailkernel);
1741                 } else {
1742                         __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1743                                 dd->ipath_pioavailshadow);
1744                         __clear_bit(start, dd->ipath_pioavailkernel);
1745                 }
1746                 start += 2;
1747         }
1748
1749         if (dd->ipath_pioupd_thresh) {
1750                 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1751                 next = find_first_bit(dd->ipath_pioavailkernel, end);
1752                 while (next < end) {
1753                         cnt++;
1754                         next = find_next_bit(dd->ipath_pioavailkernel, end,
1755                                         next + 1);
1756                 }
1757         }
1758         spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1759
1760         /*
1761          * When moving buffers from kernel to user, if number assigned to
1762          * the user is less than the pio update threshold, and threshold
1763          * is supported (cnt was computed > 0), drop the update threshold
1764          * so we update at least once per allocated number of buffers.
1765          * In any case, if the kernel buffers are less than the threshold,
1766          * drop the threshold.  We don't bother increasing it, having once
1767          * decreased it, since it would typically just cycle back and forth.
1768          * If we don't decrease below buffers in use, we can wait a long
1769          * time for an update, until some other context uses PIO buffers.
1770          */
1771         if (!avail && len < cnt)
1772                 cnt = len;
1773         if (cnt < dd->ipath_pioupd_thresh) {
1774                 dd->ipath_pioupd_thresh = cnt;
1775                 ipath_dbg("Decreased pio update threshold to %u\n",
1776                         dd->ipath_pioupd_thresh);
1777                 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1778                 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
1779                         << INFINIPATH_S_UPDTHRESH_SHIFT);
1780                 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
1781                         << INFINIPATH_S_UPDTHRESH_SHIFT;
1782                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1783                         dd->ipath_sendctrl);
1784                 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1785         }
1786 }
1787
1788 /**
1789  * ipath_create_rcvhdrq - create a receive header queue
1790  * @dd: the infinipath device
1791  * @pd: the port data
1792  *
1793  * this must be contiguous memory (from an i/o perspective), and must be
1794  * DMA'able (which means for some systems, it will go through an IOMMU,
1795  * or be forced into a low address range).
1796  */
1797 int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1798                          struct ipath_portdata *pd)
1799 {
1800         int ret = 0;
1801
1802         if (!pd->port_rcvhdrq) {
1803                 dma_addr_t phys_hdrqtail;
1804                 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1805                 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1806                                 sizeof(u32), PAGE_SIZE);
1807
1808                 pd->port_rcvhdrq = dma_alloc_coherent(
1809                         &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1810                         gfp_flags);
1811
1812                 if (!pd->port_rcvhdrq) {
1813                         ipath_dev_err(dd, "attempt to allocate %d bytes "
1814                                       "for port %u rcvhdrq failed\n",
1815                                       amt, pd->port_port);
1816                         ret = -ENOMEM;
1817                         goto bail;
1818                 }
1819
1820                 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1821                         pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1822                                 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
1823                                 GFP_KERNEL);
1824                         if (!pd->port_rcvhdrtail_kvaddr) {
1825                                 ipath_dev_err(dd, "attempt to allocate 1 page "
1826                                         "for port %u rcvhdrqtailaddr "
1827                                         "failed\n", pd->port_port);
1828                                 ret = -ENOMEM;
1829                                 dma_free_coherent(&dd->pcidev->dev, amt,
1830                                         pd->port_rcvhdrq,
1831                                         pd->port_rcvhdrq_phys);
1832                                 pd->port_rcvhdrq = NULL;
1833                                 goto bail;
1834                         }
1835                         pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1836                         ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
1837                                    "physical\n", pd->port_port,
1838                                    (unsigned long long) phys_hdrqtail);
1839                 }
1840
1841                 pd->port_rcvhdrq_size = amt;
1842
1843                 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1844                            "for port %u rcvhdr Q\n",
1845                            amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1846                            (unsigned long) pd->port_rcvhdrq_phys,
1847                            (unsigned long) pd->port_rcvhdrq_size,
1848                            pd->port_port);
1849         }
1850         else
1851                 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1852                            "hdrtailaddr@%p %llx physical\n",
1853                            pd->port_port, pd->port_rcvhdrq,
1854                            (unsigned long long) pd->port_rcvhdrq_phys,
1855                            pd->port_rcvhdrtail_kvaddr, (unsigned long long)
1856                            pd->port_rcvhdrqtailaddr_phys);
1857
1858         /* clear for security and sanity on each use */
1859         memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1860         if (pd->port_rcvhdrtail_kvaddr)
1861                 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1862
1863         /*
1864          * tell chip each time we init it, even if we are re-using previous
1865          * memory (we zero the register at process close)
1866          */
1867         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1868                               pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1869         ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1870                               pd->port_port, pd->port_rcvhdrq_phys);
1871
1872 bail:
1873         return ret;
1874 }
1875
1876
1877 /*
1878  * Flush all sends that might be in the ready to send state, as well as any
1879  * that are in the process of being sent.   Used whenever we need to be
1880  * sure the send side is idle.  Cleans up all buffer state by canceling
1881  * all pio buffers, and issuing an abort, which cleans up anything in the
1882  * launch fifo.  The cancel is superfluous on some chip versions, but
1883  * it's safer to always do it.
1884  * PIOAvail bits are updated by the chip as if normal send had happened.
1885  */
1886 void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1887 {
1888         unsigned long flags;
1889
1890         if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
1891                 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
1892                 goto bail;
1893         }
1894         /*
1895          * If we have SDMA, and it's not disabled, we have to kick off the
1896          * abort state machine, provided we aren't already aborting.
1897          * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
1898          * we skip the rest of this routine. It is already "in progress"
1899          */
1900         if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
1901                 int skip_cancel;
1902                 unsigned long *statp = &dd->ipath_sdma_status;
1903
1904                 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1905                 skip_cancel =
1906                         test_and_set_bit(IPATH_SDMA_ABORTING, statp)
1907                         && !test_bit(IPATH_SDMA_DISABLED, statp);
1908                 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1909                 if (skip_cancel)
1910                         goto bail;
1911         }
1912
1913         ipath_dbg("Cancelling all in-progress send buffers\n");
1914
1915         /* skip armlaunch errs for a while */
1916         dd->ipath_lastcancel = jiffies + HZ / 2;
1917
1918         /*
1919          * The abort bit is auto-clearing.  We also don't want pioavail
1920          * update happening during this, and we don't want any other
1921          * sends going out, so turn those off for the duration.  We read
1922          * the scratch register to be sure that cancels and the abort
1923          * have taken effect in the chip.  Otherwise two parts are same
1924          * as ipath_force_pio_avail_update()
1925          */
1926         spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1927         dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
1928                 | INFINIPATH_S_PIOENABLE);
1929         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1930                 dd->ipath_sendctrl | INFINIPATH_S_ABORT);
1931         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1932         spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1933
1934         /* disarm all send buffers */
1935         ipath_disarm_piobufs(dd, 0,
1936                 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1937
1938         if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1939                 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1940
1941         if (restore_sendctrl) {
1942                 /* else done by caller later if needed */
1943                 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1944                 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
1945                         INFINIPATH_S_PIOENABLE;
1946                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1947                         dd->ipath_sendctrl);
1948                 /* and again, be sure all have hit the chip */
1949                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1950                 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1951         }
1952
1953         if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
1954             !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
1955             test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
1956                 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1957                 /* only wait so long for intr */
1958                 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
1959                 dd->ipath_sdma_reset_wait = 200;
1960                 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
1961                         tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
1962                 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1963         }
1964 bail:;
1965 }
1966
1967 /*
1968  * Force an update of in-memory copy of the pioavail registers, when
1969  * needed for any of a variety of reasons.  We read the scratch register
1970  * to make it highly likely that the update will have happened by the
1971  * time we return.  If already off (as in cancel_sends above), this
1972  * routine is a nop, on the assumption that the caller will "do the
1973  * right thing".
1974  */
1975 void ipath_force_pio_avail_update(struct ipath_devdata *dd)
1976 {
1977         unsigned long flags;
1978
1979         spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1980         if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
1981                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1982                         dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
1983                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1984                 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1985                         dd->ipath_sendctrl);
1986                 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1987         }
1988         spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1989 }
1990
1991 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
1992                                 int linitcmd)
1993 {
1994         u64 mod_wd;
1995         static const char *what[4] = {
1996                 [0] = "NOP",
1997                 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
1998                 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1999                 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
2000         };
2001
2002         if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
2003                 /*
2004                  * If we are told to disable, note that so link-recovery
2005                  * code does not attempt to bring us back up.
2006                  */
2007                 preempt_disable();
2008                 dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
2009                 preempt_enable();
2010         } else if (linitcmd) {
2011                 /*
2012                  * Any other linkinitcmd will lead to LINKDOWN and then
2013                  * to INIT (if all is well), so clear flag to let
2014                  * link-recovery code attempt to bring us back up.
2015                  */
2016                 preempt_disable();
2017                 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
2018                 preempt_enable();
2019         }
2020
2021         mod_wd = (linkcmd << dd->ibcc_lc_shift) |
2022                 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
2023         ipath_cdbg(VERBOSE,
2024                 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
2025                 dd->ipath_unit, what[linkcmd], linitcmd,
2026                 ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
2027                         ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
2028
2029         ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2030                          dd->ipath_ibcctrl | mod_wd);
2031         /* read from chip so write is flushed */
2032         (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
2033 }
2034
2035 int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
2036 {
2037         u32 lstate;
2038         int ret;
2039
2040         switch (newstate) {
2041         case IPATH_IB_LINKDOWN_ONLY:
2042                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
2043                 /* don't wait */
2044                 ret = 0;
2045                 goto bail;
2046
2047         case IPATH_IB_LINKDOWN:
2048                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2049                                         INFINIPATH_IBCC_LINKINITCMD_POLL);
2050                 /* don't wait */
2051                 ret = 0;
2052                 goto bail;
2053
2054         case IPATH_IB_LINKDOWN_SLEEP:
2055                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2056                                         INFINIPATH_IBCC_LINKINITCMD_SLEEP);
2057                 /* don't wait */
2058                 ret = 0;
2059                 goto bail;
2060
2061         case IPATH_IB_LINKDOWN_DISABLE:
2062                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2063                                         INFINIPATH_IBCC_LINKINITCMD_DISABLE);
2064                 /* don't wait */
2065                 ret = 0;
2066                 goto bail;
2067
2068         case IPATH_IB_LINKARM:
2069                 if (dd->ipath_flags & IPATH_LINKARMED) {
2070                         ret = 0;
2071                         goto bail;
2072                 }
2073                 if (!(dd->ipath_flags &
2074                       (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
2075                         ret = -EINVAL;
2076                         goto bail;
2077                 }
2078                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
2079
2080                 /*
2081                  * Since the port can transition to ACTIVE by receiving
2082                  * a non VL 15 packet, wait for either state.
2083                  */
2084                 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
2085                 break;
2086
2087         case IPATH_IB_LINKACTIVE:
2088                 if (dd->ipath_flags & IPATH_LINKACTIVE) {
2089                         ret = 0;
2090                         goto bail;
2091                 }
2092                 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
2093                         ret = -EINVAL;
2094                         goto bail;
2095                 }
2096                 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
2097                 lstate = IPATH_LINKACTIVE;
2098                 break;
2099
2100         case IPATH_IB_LINK_LOOPBACK:
2101                 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
2102                 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
2103                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2104                                  dd->ipath_ibcctrl);
2105
2106                 /* turn heartbeat off, as it causes loopback to fail */
2107                 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2108                                        IPATH_IB_HRTBT_OFF);
2109                 /* don't wait */
2110                 ret = 0;
2111                 goto bail;
2112
2113         case IPATH_IB_LINK_EXTERNAL:
2114                 dev_info(&dd->pcidev->dev,
2115                         "Disabling IB local loopback (normal)\n");
2116                 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2117                                        IPATH_IB_HRTBT_ON);
2118                 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
2119                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2120                                  dd->ipath_ibcctrl);
2121                 /* don't wait */
2122                 ret = 0;
2123                 goto bail;
2124
2125         /*
2126          * Heartbeat can be explicitly enabled by the user via
2127          * "hrtbt_enable" "file", and if disabled, trying to enable here
2128          * will have no effect.  Implicit changes (heartbeat off when
2129          * loopback on, and vice versa) are included to ease testing.
2130          */
2131         case IPATH_IB_LINK_HRTBT:
2132                 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2133                         IPATH_IB_HRTBT_ON);
2134                 goto bail;
2135
2136         case IPATH_IB_LINK_NO_HRTBT:
2137                 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2138                         IPATH_IB_HRTBT_OFF);
2139                 goto bail;
2140
2141         default:
2142                 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
2143                 ret = -EINVAL;
2144                 goto bail;
2145         }
2146         ret = ipath_wait_linkstate(dd, lstate, 2000);
2147
2148 bail:
2149         return ret;
2150 }
2151
2152 /**
2153  * ipath_set_mtu - set the MTU
2154  * @dd: the infinipath device
2155  * @arg: the new MTU
2156  *
2157  * we can handle "any" incoming size, the issue here is whether we
2158  * need to restrict our outgoing size.   For now, we don't do any
2159  * sanity checking on this, and we don't deal with what happens to
2160  * programs that are already running when the size changes.
2161  * NOTE: changing the MTU will usually cause the IBC to go back to
2162  * link INIT state...
2163  */
2164 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
2165 {
2166         u32 piosize;
2167         int changed = 0;
2168         int ret;
2169
2170         /*
2171          * mtu is IB data payload max.  It's the largest power of 2 less
2172          * than piosize (or even larger, since it only really controls the
2173          * largest we can receive; we can send the max of the mtu and
2174          * piosize).  We check that it's one of the valid IB sizes.
2175          */
2176         if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
2177             (arg != 4096 || !ipath_mtu4096)) {
2178                 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
2179                 ret = -EINVAL;
2180                 goto bail;
2181         }
2182         if (dd->ipath_ibmtu == arg) {
2183                 ret = 0;        /* same as current */
2184                 goto bail;
2185         }
2186
2187         piosize = dd->ipath_ibmaxlen;
2188         dd->ipath_ibmtu = arg;
2189
2190         if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
2191                 /* Only if it's not the initial value (or reset to it) */
2192                 if (piosize != dd->ipath_init_ibmaxlen) {
2193                         if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
2194                                 piosize = dd->ipath_init_ibmaxlen;
2195                         dd->ipath_ibmaxlen = piosize;
2196                         changed = 1;
2197                 }
2198         } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
2199                 piosize = arg + IPATH_PIO_MAXIBHDR;
2200                 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
2201                            "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
2202                            arg);
2203                 dd->ipath_ibmaxlen = piosize;
2204                 changed = 1;
2205         }
2206
2207         if (changed) {
2208                 u64 ibc = dd->ipath_ibcctrl, ibdw;
2209                 /*
2210                  * update our housekeeping variables, and set IBC max
2211                  * size, same as init code; max IBC is max we allow in
2212                  * buffer, less the qword pbc, plus 1 for ICRC, in dwords
2213                  */
2214                 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
2215                 ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
2216                 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
2217                          dd->ibcc_mpl_shift);
2218                 ibc |= ibdw << dd->ibcc_mpl_shift;
2219                 dd->ipath_ibcctrl = ibc;
2220                 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2221                                  dd->ipath_ibcctrl);
2222                 dd->ipath_f_tidtemplate(dd);
2223         }
2224
2225         ret = 0;
2226
2227 bail:
2228         return ret;
2229 }
2230
2231 int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
2232 {
2233         dd->ipath_lid = lid;
2234         dd->ipath_lmc = lmc;
2235
2236         dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
2237                 (~((1U << lmc) - 1)) << 16);
2238
2239         dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
2240
2241         return 0;
2242 }
2243
2244
2245 /**
2246  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
2247  * @dd: the infinipath device
2248  * @regno: the register number to write
2249  * @port: the port containing the register
2250  * @value: the value to write
2251  *
2252  * Registers that vary with the chip implementation constants (port)
2253  * use this routine.
2254  */
2255 void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
2256                           unsigned port, u64 value)
2257 {
2258         u16 where;
2259
2260         if (port < dd->ipath_portcnt &&
2261             (regno == dd->ipath_kregs->kr_rcvhdraddr ||
2262              regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
2263                 where = regno + port;
2264         else
2265                 where = -1;
2266
2267         ipath_write_kreg(dd, where, value);
2268 }
2269
2270 /*
2271  * Following deal with the "obviously simple" task of overriding the state
2272  * of the LEDS, which normally indicate link physical and logical status.
2273  * The complications arise in dealing with different hardware mappings
2274  * and the board-dependent routine being called from interrupts.
2275  * and then there's the requirement to _flash_ them.
2276  */
2277 #define LED_OVER_FREQ_SHIFT 8
2278 #define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
2279 /* Below is "non-zero" to force override, but both actual LEDs are off */
2280 #define LED_OVER_BOTH_OFF (8)
2281
2282 static void ipath_run_led_override(unsigned long opaque)
2283 {
2284         struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2285         int timeoff;
2286         int pidx;
2287         u64 lstate, ltstate, val;
2288
2289         if (!(dd->ipath_flags & IPATH_INITTED))
2290                 return;
2291
2292         pidx = dd->ipath_led_override_phase++ & 1;
2293         dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
2294         timeoff = dd->ipath_led_override_timeoff;
2295
2296         /*
2297          * below potentially restores the LED values per current status,
2298          * should also possibly setup the traffic-blink register,
2299          * but leave that to per-chip functions.
2300          */
2301         val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
2302         ltstate = ipath_ib_linktrstate(dd, val);
2303         lstate = ipath_ib_linkstate(dd, val);
2304
2305         dd->ipath_f_setextled(dd, lstate, ltstate);
2306         mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
2307 }
2308
2309 void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
2310 {
2311         int timeoff, freq;
2312
2313         if (!(dd->ipath_flags & IPATH_INITTED))
2314                 return;
2315
2316         /* First check if we are blinking. If not, use 1HZ polling */
2317         timeoff = HZ;
2318         freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
2319
2320         if (freq) {
2321                 /* For blink, set each phase from one nybble of val */
2322                 dd->ipath_led_override_vals[0] = val & 0xF;
2323                 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
2324                 timeoff = (HZ << 4)/freq;
2325         } else {
2326                 /* Non-blink set both phases the same. */
2327                 dd->ipath_led_override_vals[0] = val & 0xF;
2328                 dd->ipath_led_override_vals[1] = val & 0xF;
2329         }
2330         dd->ipath_led_override_timeoff = timeoff;
2331
2332         /*
2333          * If the timer has not already been started, do so. Use a "quick"
2334          * timeout so the function will be called soon, to look at our request.
2335          */
2336         if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
2337                 /* Need to start timer */
2338                 init_timer(&dd->ipath_led_override_timer);
2339                 dd->ipath_led_override_timer.function =
2340                                                  ipath_run_led_override;
2341                 dd->ipath_led_override_timer.data = (unsigned long) dd;
2342                 dd->ipath_led_override_timer.expires = jiffies + 1;
2343                 add_timer(&dd->ipath_led_override_timer);
2344         } else
2345                 atomic_dec(&dd->ipath_led_override_timer_active);
2346 }
2347
2348 /**
2349  * ipath_shutdown_device - shut down a device
2350  * @dd: the infinipath device
2351  *
2352  * This is called to make the device quiet when we are about to
2353  * unload the driver, and also when the device is administratively
2354  * disabled.   It does not free any data structures.
2355  * Everything it does has to be setup again by ipath_init_chip(dd,1)
2356  */
2357 void ipath_shutdown_device(struct ipath_devdata *dd)
2358 {
2359         unsigned long flags;
2360
2361         ipath_dbg("Shutting down the device\n");
2362
2363         ipath_hol_up(dd); /* make sure user processes aren't suspended */
2364
2365         dd->ipath_flags |= IPATH_LINKUNK;
2366         dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
2367                              IPATH_LINKINIT | IPATH_LINKARMED |
2368                              IPATH_LINKACTIVE);
2369         *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
2370                                 IPATH_STATUS_IB_READY);
2371
2372         /* mask interrupts, but not errors */
2373         ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2374
2375         dd->ipath_rcvctrl = 0;
2376         ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
2377                          dd->ipath_rcvctrl);
2378
2379         if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
2380                 teardown_sdma(dd);
2381
2382         /*
2383          * gracefully stop all sends allowing any in progress to trickle out
2384          * first.
2385          */
2386         spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2387         dd->ipath_sendctrl = 0;
2388         ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2389         /* flush it */
2390         ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2391         spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2392
2393         /*
2394          * enough for anything that's going to trickle out to have actually
2395          * done so.
2396          */
2397         udelay(5);
2398
2399         dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
2400
2401         ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
2402         ipath_cancel_sends(dd, 0);
2403
2404         /*
2405          * we are shutting down, so tell components that care.  We don't do
2406          * this on just a link state change, much like ethernet, a cable
2407          * unplug, etc. doesn't change driver state
2408          */
2409         signal_ib_event(dd, IB_EVENT_PORT_ERR);
2410
2411         /* disable IBC */
2412         dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
2413         ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
2414                          dd->ipath_control | INFINIPATH_C_FREEZEMODE);
2415
2416         /*
2417          * clear SerdesEnable and turn the leds off; do this here because
2418          * we are unloading, so don't count on interrupts to move along
2419          * Turn the LEDs off explictly for the same reason.
2420          */
2421         dd->ipath_f_quiet_serdes(dd);
2422
2423         /* stop all the timers that might still be running */
2424         del_timer_sync(&dd->ipath_hol_timer);
2425         if (dd->ipath_stats_timer_active) {
2426                 del_timer_sync(&dd->ipath_stats_timer);
2427                 dd->ipath_stats_timer_active = 0;
2428         }
2429         if (dd->ipath_intrchk_timer.data) {
2430                 del_timer_sync(&dd->ipath_intrchk_timer);
2431                 dd->ipath_intrchk_timer.data = 0;
2432         }
2433         if (atomic_read(&dd->ipath_led_override_timer_active)) {
2434                 del_timer_sync(&dd->ipath_led_override_timer);
2435                 atomic_set(&dd->ipath_led_override_timer_active, 0);
2436         }
2437
2438         /*
2439          * clear all interrupts and errors, so that the next time the driver
2440          * is loaded or device is enabled, we know that whatever is set
2441          * happened while we were unloaded
2442          */
2443         ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
2444                          ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
2445         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
2446         ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2447
2448         ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2449         ipath_update_eeprom_log(dd);
2450 }
2451
2452 /**
2453  * ipath_free_pddata - free a port's allocated data
2454  * @dd: the infinipath device
2455  * @pd: the portdata structure
2456  *
2457  * free up any allocated data for a port
2458  * This should not touch anything that would affect a simultaneous
2459  * re-allocation of port data, because it is called after ipath_mutex
2460  * is released (and can be called from reinit as well).
2461  * It should never change any chip state, or global driver state.
2462  * (The only exception to global state is freeing the port0 port0_skbs.)
2463  */
2464 void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
2465 {
2466         if (!pd)
2467                 return;
2468
2469         if (pd->port_rcvhdrq) {
2470                 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
2471                            "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
2472                            (unsigned long) pd->port_rcvhdrq_size);
2473                 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
2474                                   pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
2475                 pd->port_rcvhdrq = NULL;
2476                 if (pd->port_rcvhdrtail_kvaddr) {
2477                         dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
2478                                          pd->port_rcvhdrtail_kvaddr,
2479                                          pd->port_rcvhdrqtailaddr_phys);
2480                         pd->port_rcvhdrtail_kvaddr = NULL;
2481                 }
2482         }
2483         if (pd->port_port && pd->port_rcvegrbuf) {
2484                 unsigned e;
2485
2486                 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
2487                         void *base = pd->port_rcvegrbuf[e];
2488                         size_t size = pd->port_rcvegrbuf_size;
2489
2490                         ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
2491                                    "chunk %u/%u\n", base,
2492                                    (unsigned long) size,
2493                                    e, pd->port_rcvegrbuf_chunks);
2494                         dma_free_coherent(&dd->pcidev->dev, size,
2495                                 base, pd->port_rcvegrbuf_phys[e]);
2496                 }
2497                 kfree(pd->port_rcvegrbuf);
2498                 pd->port_rcvegrbuf = NULL;
2499                 kfree(pd->port_rcvegrbuf_phys);
2500                 pd->port_rcvegrbuf_phys = NULL;
2501                 pd->port_rcvegrbuf_chunks = 0;
2502         } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
2503                 unsigned e;
2504                 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
2505
2506                 dd->ipath_port0_skbinfo = NULL;
2507                 ipath_cdbg(VERBOSE, "free closed port %d "
2508                            "ipath_port0_skbinfo @ %p\n", pd->port_port,
2509                            skbinfo);
2510                 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
2511                         if (skbinfo[e].skb) {
2512                                 pci_unmap_single(dd->pcidev, skbinfo[e].phys,
2513                                                  dd->ipath_ibmaxlen,
2514                                                  PCI_DMA_FROMDEVICE);
2515                                 dev_kfree_skb(skbinfo[e].skb);
2516                         }
2517                 vfree(skbinfo);
2518         }
2519         kfree(pd->port_tid_pg_list);
2520         vfree(pd->subport_uregbase);
2521         vfree(pd->subport_rcvegrbuf);
2522         vfree(pd->subport_rcvhdr_base);
2523         kfree(pd);
2524 }
2525
2526 static int __init infinipath_init(void)
2527 {
2528         int ret;
2529
2530         if (ipath_debug & __IPATH_DBG)
2531                 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
2532
2533         /*
2534          * These must be called before the driver is registered with
2535          * the PCI subsystem.
2536          */
2537         idr_init(&unit_table);
2538         if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
2539                 printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");
2540                 ret = -ENOMEM;
2541                 goto bail;
2542         }
2543
2544         ret = pci_register_driver(&ipath_driver);
2545         if (ret < 0) {
2546                 printk(KERN_ERR IPATH_DRV_NAME
2547                        ": Unable to register driver: error %d\n", -ret);
2548                 goto bail_unit;
2549         }
2550
2551         ret = ipath_init_ipathfs();
2552         if (ret < 0) {
2553                 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
2554                        "ipathfs: error %d\n", -ret);
2555                 goto bail_pci;
2556         }
2557
2558         goto bail;
2559
2560 bail_pci:
2561         pci_unregister_driver(&ipath_driver);
2562
2563 bail_unit:
2564         idr_destroy(&unit_table);
2565
2566 bail:
2567         return ret;
2568 }
2569
2570 static void __exit infinipath_cleanup(void)
2571 {
2572         ipath_exit_ipathfs();
2573
2574         ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
2575         pci_unregister_driver(&ipath_driver);
2576
2577         idr_destroy(&unit_table);
2578 }
2579
2580 /**
2581  * ipath_reset_device - reset the chip if possible
2582  * @unit: the device to reset
2583  *
2584  * Whether or not reset is successful, we attempt to re-initialize the chip
2585  * (that is, much like a driver unload/reload).  We clear the INITTED flag
2586  * so that the various entry points will fail until we reinitialize.  For
2587  * now, we only allow this if no user ports are open that use chip resources
2588  */
2589 int ipath_reset_device(int unit)
2590 {
2591         int ret, i;
2592         struct ipath_devdata *dd = ipath_lookup(unit);
2593         unsigned long flags;
2594
2595         if (!dd) {
2596                 ret = -ENODEV;
2597                 goto bail;
2598         }
2599
2600         if (atomic_read(&dd->ipath_led_override_timer_active)) {
2601                 /* Need to stop LED timer, _then_ shut off LEDs */
2602                 del_timer_sync(&dd->ipath_led_override_timer);
2603                 atomic_set(&dd->ipath_led_override_timer_active, 0);
2604         }
2605
2606         /* Shut off LEDs after we are sure timer is not running */
2607         dd->ipath_led_override = LED_OVER_BOTH_OFF;
2608         dd->ipath_f_setextled(dd, 0, 0);
2609
2610         dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2611
2612         if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
2613                 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
2614                          "not initialized or not present\n", unit);
2615                 ret = -ENXIO;
2616                 goto bail;
2617         }
2618
2619         spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2620         if (dd->ipath_pd)
2621                 for (i = 1; i < dd->ipath_cfgports; i++) {
2622                         if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2623                                 continue;
2624                         spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2625                         ipath_dbg("unit %u port %d is in use "
2626                                   "(PID %u cmd %s), can't reset\n",
2627                                   unit, i,
2628                                   pid_nr(dd->ipath_pd[i]->port_pid),
2629                                   dd->ipath_pd[i]->port_comm);
2630                         ret = -EBUSY;
2631                         goto bail;
2632                 }
2633         spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2634
2635         if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
2636                 teardown_sdma(dd);
2637
2638         dd->ipath_flags &= ~IPATH_INITTED;
2639         ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2640         ret = dd->ipath_f_reset(dd);
2641         if (ret == 1) {
2642                 ipath_dbg("Reinitializing unit %u after reset attempt\n",
2643                           unit);
2644                 ret = ipath_init_chip(dd, 1);
2645         } else
2646                 ret = -EAGAIN;
2647         if (ret)
2648                 ipath_dev_err(dd, "Reinitialize unit %u after "
2649                               "reset failed with %d\n", unit, ret);
2650         else
2651                 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
2652                          "resetting\n", unit);
2653
2654 bail:
2655         return ret;
2656 }
2657
2658 /*
2659  * send a signal to all the processes that have the driver open
2660  * through the normal interfaces (i.e., everything other than diags
2661  * interface).  Returns number of signalled processes.
2662  */
2663 static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2664 {
2665         int i, sub, any = 0;
2666         struct pid *pid;
2667         unsigned long flags;
2668
2669         if (!dd->ipath_pd)
2670                 return 0;
2671
2672         spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2673         for (i = 1; i < dd->ipath_cfgports; i++) {
2674                 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2675                         continue;
2676                 pid = dd->ipath_pd[i]->port_pid;
2677                 if (!pid)
2678                         continue;
2679
2680                 dev_info(&dd->pcidev->dev, "context %d in use "
2681                           "(PID %u), sending signal %d\n",
2682                           i, pid_nr(pid), sig);
2683                 kill_pid(pid, sig, 1);
2684                 any++;
2685                 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
2686                         pid = dd->ipath_pd[i]->port_subpid[sub];
2687                         if (!pid)
2688                                 continue;
2689                         dev_info(&dd->pcidev->dev, "sub-context "
2690                                 "%d:%d in use (PID %u), sending "
2691                                 "signal %d\n", i, sub, pid_nr(pid), sig);
2692                         kill_pid(pid, sig, 1);
2693                         any++;
2694                 }
2695         }
2696         spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2697         return any;
2698 }
2699
2700 static void ipath_hol_signal_down(struct ipath_devdata *dd)
2701 {
2702         if (ipath_signal_procs(dd, SIGSTOP))
2703                 ipath_dbg("Stopped some processes\n");
2704         ipath_cancel_sends(dd, 1);
2705 }
2706
2707
2708 static void ipath_hol_signal_up(struct ipath_devdata *dd)
2709 {
2710         if (ipath_signal_procs(dd, SIGCONT))
2711                 ipath_dbg("Continued some processes\n");
2712 }
2713
2714 /*
2715  * link is down, stop any users processes, and flush pending sends
2716  * to prevent HoL blocking, then start the HoL timer that
2717  * periodically continues, then stop procs, so they can detect
2718  * link down if they want, and do something about it.
2719  * Timer may already be running, so use mod_timer, not add_timer.
2720  */
2721 void ipath_hol_down(struct ipath_devdata *dd)
2722 {
2723         dd->ipath_hol_state = IPATH_HOL_DOWN;
2724         ipath_hol_signal_down(dd);
2725         dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2726         dd->ipath_hol_timer.expires = jiffies +
2727                 msecs_to_jiffies(ipath_hol_timeout_ms);
2728         mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
2729 }
2730
2731 /*
2732  * link is up, continue any user processes, and ensure timer
2733  * is a nop, if running.  Let timer keep running, if set; it
2734  * will nop when it sees the link is up
2735  */
2736 void ipath_hol_up(struct ipath_devdata *dd)
2737 {
2738         ipath_hol_signal_up(dd);
2739         dd->ipath_hol_state = IPATH_HOL_UP;
2740 }
2741
2742 /*
2743  * toggle the running/not running state of user proceses
2744  * to prevent HoL blocking on chip resources, but still allow
2745  * user processes to do link down special case handling.
2746  * Should only be called via the timer
2747  */
2748 void ipath_hol_event(unsigned long opaque)
2749 {
2750         struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2751
2752         if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
2753                 && dd->ipath_hol_state != IPATH_HOL_UP) {
2754                 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2755                 ipath_dbg("Stopping processes\n");
2756                 ipath_hol_signal_down(dd);
2757         } else { /* may do "extra" if also in ipath_hol_up() */
2758                 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
2759                 ipath_dbg("Continuing processes\n");
2760                 ipath_hol_signal_up(dd);
2761         }
2762         if (dd->ipath_hol_state == IPATH_HOL_UP)
2763                 ipath_dbg("link's up, don't resched timer\n");
2764         else {
2765                 dd->ipath_hol_timer.expires = jiffies +
2766                         msecs_to_jiffies(ipath_hol_timeout_ms);
2767                 mod_timer(&dd->ipath_hol_timer,
2768                         dd->ipath_hol_timer.expires);
2769         }
2770 }
2771
2772 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2773 {
2774         u64 val;
2775
2776         if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
2777                 return -1;
2778         if (dd->ipath_rx_pol_inv != new_pol_inv) {
2779                 dd->ipath_rx_pol_inv = new_pol_inv;
2780                 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2781                 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2782                          INFINIPATH_XGXS_RX_POL_SHIFT);
2783                 val |= ((u64)dd->ipath_rx_pol_inv) <<
2784                         INFINIPATH_XGXS_RX_POL_SHIFT;
2785                 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2786         }
2787         return 0;
2788 }
2789
2790 /*
2791  * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
2792  * the 7220, which is count-based, rather than trigger-based.  Safe for the
2793  * driver check, since it's at init.   Not completely safe when used for
2794  * user-mode checking, since some error checking can be lost, but not
2795  * particularly risky, and only has problematic side-effects in the face of
2796  * very buggy user code.  There is no reference counting, but that's also
2797  * fine, given the intended use.
2798  */
2799 void ipath_enable_armlaunch(struct ipath_devdata *dd)
2800 {
2801         dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
2802         ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
2803                 INFINIPATH_E_SPIOARMLAUNCH);
2804         dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
2805         ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2806                 dd->ipath_errormask);
2807 }
2808
2809 void ipath_disable_armlaunch(struct ipath_devdata *dd)
2810 {
2811         /* so don't re-enable if already set */
2812         dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
2813         dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
2814         ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2815                 dd->ipath_errormask);
2816 }
2817
2818 module_init(infinipath_init);
2819 module_exit(infinipath_cleanup);