]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - drivers/pci/dmar.c
Merge branch 'agp-next' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied...
[linux-2.6.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270
271 static LIST_HEAD(dmar_atsr_units);
272
273 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
274 {
275         struct acpi_dmar_atsr *atsr;
276         struct dmar_atsr_unit *atsru;
277
278         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280         if (!atsru)
281                 return -ENOMEM;
282
283         atsru->hdr = hdr;
284         atsru->include_all = atsr->flags & 0x1;
285
286         list_add(&atsru->list, &dmar_atsr_units);
287
288         return 0;
289 }
290
291 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
292 {
293         int rc;
294         struct acpi_dmar_atsr *atsr;
295
296         if (atsru->include_all)
297                 return 0;
298
299         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300         rc = dmar_parse_dev_scope((void *)(atsr + 1),
301                                 (void *)atsr + atsr->header.length,
302                                 &atsru->devices_cnt, &atsru->devices,
303                                 atsr->segment);
304         if (rc || !atsru->devices_cnt) {
305                 list_del(&atsru->list);
306                 kfree(atsru);
307         }
308
309         return rc;
310 }
311
312 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
313 {
314         int i;
315         struct pci_bus *bus;
316         struct acpi_dmar_atsr *atsr;
317         struct dmar_atsr_unit *atsru;
318
319         list_for_each_entry(atsru, &dmar_atsr_units, list) {
320                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321                 if (atsr->segment == pci_domain_nr(dev->bus))
322                         goto found;
323         }
324
325         return 0;
326
327 found:
328         for (bus = dev->bus; bus; bus = bus->parent) {
329                 struct pci_dev *bridge = bus->self;
330
331                 if (!bridge || !bridge->is_pcie ||
332                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333                         return 0;
334
335                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336                         for (i = 0; i < atsru->devices_cnt; i++)
337                                 if (atsru->devices[i] == bridge)
338                                         return 1;
339                         break;
340                 }
341         }
342
343         if (atsru->include_all)
344                 return 1;
345
346         return 0;
347 }
348 #endif
349
350 static void __init
351 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
352 {
353         struct acpi_dmar_hardware_unit *drhd;
354         struct acpi_dmar_reserved_memory *rmrr;
355         struct acpi_dmar_atsr *atsr;
356
357         switch (header->type) {
358         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360                                     header);
361                 printk (KERN_INFO PREFIX
362                         "DRHD base: %#016Lx flags: %#x\n",
363                         (unsigned long long)drhd->address, drhd->flags);
364                 break;
365         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
366                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
367                                     header);
368                 printk (KERN_INFO PREFIX
369                         "RMRR base: %#016Lx end: %#016Lx\n",
370                         (unsigned long long)rmrr->base_address,
371                         (unsigned long long)rmrr->end_address);
372                 break;
373         case ACPI_DMAR_TYPE_ATSR:
374                 atsr = container_of(header, struct acpi_dmar_atsr, header);
375                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376                 break;
377         }
378 }
379
380 /**
381  * dmar_table_detect - checks to see if the platform supports DMAR devices
382  */
383 static int __init dmar_table_detect(void)
384 {
385         acpi_status status = AE_OK;
386
387         /* if we could find DMAR table, then there are DMAR devices */
388         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
389                                 (struct acpi_table_header **)&dmar_tbl,
390                                 &dmar_tbl_size);
391
392         if (ACPI_SUCCESS(status) && !dmar_tbl) {
393                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
394                 status = AE_NOT_FOUND;
395         }
396
397         return (ACPI_SUCCESS(status) ? 1 : 0);
398 }
399
400 /**
401  * parse_dmar_table - parses the DMA reporting table
402  */
403 static int __init
404 parse_dmar_table(void)
405 {
406         struct acpi_table_dmar *dmar;
407         struct acpi_dmar_header *entry_header;
408         int ret = 0;
409
410         /*
411          * Do it again, earlier dmar_tbl mapping could be mapped with
412          * fixed map.
413          */
414         dmar_table_detect();
415
416         dmar = (struct acpi_table_dmar *)dmar_tbl;
417         if (!dmar)
418                 return -ENODEV;
419
420         if (dmar->width < PAGE_SHIFT - 1) {
421                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
422                 return -EINVAL;
423         }
424
425         printk (KERN_INFO PREFIX "Host address width %d\n",
426                 dmar->width + 1);
427
428         entry_header = (struct acpi_dmar_header *)(dmar + 1);
429         while (((unsigned long)entry_header) <
430                         (((unsigned long)dmar) + dmar_tbl->length)) {
431                 /* Avoid looping forever on bad ACPI tables */
432                 if (entry_header->length == 0) {
433                         printk(KERN_WARNING PREFIX
434                                 "Invalid 0-length structure\n");
435                         ret = -EINVAL;
436                         break;
437                 }
438
439                 dmar_table_print_dmar_entry(entry_header);
440
441                 switch (entry_header->type) {
442                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
443                         ret = dmar_parse_one_drhd(entry_header);
444                         break;
445                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
446 #ifdef CONFIG_DMAR
447                         ret = dmar_parse_one_rmrr(entry_header);
448 #endif
449                         break;
450                 case ACPI_DMAR_TYPE_ATSR:
451 #ifdef CONFIG_DMAR
452                         ret = dmar_parse_one_atsr(entry_header);
453 #endif
454                         break;
455                 default:
456                         printk(KERN_WARNING PREFIX
457                                 "Unknown DMAR structure type\n");
458                         ret = 0; /* for forward compatibility */
459                         break;
460                 }
461                 if (ret)
462                         break;
463
464                 entry_header = ((void *)entry_header + entry_header->length);
465         }
466         return ret;
467 }
468
469 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
470                           struct pci_dev *dev)
471 {
472         int index;
473
474         while (dev) {
475                 for (index = 0; index < cnt; index++)
476                         if (dev == devices[index])
477                                 return 1;
478
479                 /* Check our parent */
480                 dev = dev->bus->self;
481         }
482
483         return 0;
484 }
485
486 struct dmar_drhd_unit *
487 dmar_find_matched_drhd_unit(struct pci_dev *dev)
488 {
489         struct dmar_drhd_unit *dmaru = NULL;
490         struct acpi_dmar_hardware_unit *drhd;
491
492         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
493                 drhd = container_of(dmaru->hdr,
494                                     struct acpi_dmar_hardware_unit,
495                                     header);
496
497                 if (dmaru->include_all &&
498                     drhd->segment == pci_domain_nr(dev->bus))
499                         return dmaru;
500
501                 if (dmar_pci_device_match(dmaru->devices,
502                                           dmaru->devices_cnt, dev))
503                         return dmaru;
504         }
505
506         return NULL;
507 }
508
509 int __init dmar_dev_scope_init(void)
510 {
511         struct dmar_drhd_unit *drhd, *drhd_n;
512         int ret = -ENODEV;
513
514         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
515                 ret = dmar_parse_dev(drhd);
516                 if (ret)
517                         return ret;
518         }
519
520 #ifdef CONFIG_DMAR
521         {
522                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
523                 struct dmar_atsr_unit *atsr, *atsr_n;
524
525                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
526                         ret = rmrr_parse_dev(rmrr);
527                         if (ret)
528                                 return ret;
529                 }
530
531                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
532                         ret = atsr_parse_dev(atsr);
533                         if (ret)
534                                 return ret;
535                 }
536         }
537 #endif
538
539         return ret;
540 }
541
542
543 int __init dmar_table_init(void)
544 {
545         static int dmar_table_initialized;
546         int ret;
547
548         if (dmar_table_initialized)
549                 return 0;
550
551         dmar_table_initialized = 1;
552
553         ret = parse_dmar_table();
554         if (ret) {
555                 if (ret != -ENODEV)
556                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
557                 return ret;
558         }
559
560         if (list_empty(&dmar_drhd_units)) {
561                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
562                 return -ENODEV;
563         }
564
565 #ifdef CONFIG_DMAR
566         if (list_empty(&dmar_rmrr_units))
567                 printk(KERN_INFO PREFIX "No RMRR found\n");
568
569         if (list_empty(&dmar_atsr_units))
570                 printk(KERN_INFO PREFIX "No ATSR found\n");
571 #endif
572
573 #ifdef CONFIG_INTR_REMAP
574         parse_ioapics_under_ir();
575 #endif
576         return 0;
577 }
578
579 void __init detect_intel_iommu(void)
580 {
581         int ret;
582
583         ret = dmar_table_detect();
584
585         {
586 #ifdef CONFIG_INTR_REMAP
587                 struct acpi_table_dmar *dmar;
588                 /*
589                  * for now we will disable dma-remapping when interrupt
590                  * remapping is enabled.
591                  * When support for queued invalidation for IOTLB invalidation
592                  * is added, we will not need this any more.
593                  */
594                 dmar = (struct acpi_table_dmar *) dmar_tbl;
595                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
596                         printk(KERN_INFO
597                                "Queued invalidation will be enabled to support "
598                                "x2apic and Intr-remapping.\n");
599 #endif
600 #ifdef CONFIG_DMAR
601                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
602                     !dmar_disabled)
603                         iommu_detected = 1;
604 #endif
605         }
606         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
607         dmar_tbl = NULL;
608 }
609
610
611 int alloc_iommu(struct dmar_drhd_unit *drhd)
612 {
613         struct intel_iommu *iommu;
614         int map_size;
615         u32 ver;
616         static int iommu_allocated = 0;
617         int agaw = 0;
618         int msagaw = 0;
619
620         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
621         if (!iommu)
622                 return -ENOMEM;
623
624         iommu->seq_id = iommu_allocated++;
625         sprintf (iommu->name, "dmar%d", iommu->seq_id);
626
627         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
628         if (!iommu->reg) {
629                 printk(KERN_ERR "IOMMU: can't map the region\n");
630                 goto error;
631         }
632         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
633         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
634
635 #ifdef CONFIG_DMAR
636         agaw = iommu_calculate_agaw(iommu);
637         if (agaw < 0) {
638                 printk(KERN_ERR
639                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
640                        iommu->seq_id);
641                 goto error;
642         }
643         msagaw = iommu_calculate_max_sagaw(iommu);
644         if (msagaw < 0) {
645                 printk(KERN_ERR
646                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
647                         iommu->seq_id);
648                 goto error;
649         }
650 #endif
651         iommu->agaw = agaw;
652         iommu->msagaw = msagaw;
653
654         /* the registers might be more than one page */
655         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
656                 cap_max_fault_reg_offset(iommu->cap));
657         map_size = VTD_PAGE_ALIGN(map_size);
658         if (map_size > VTD_PAGE_SIZE) {
659                 iounmap(iommu->reg);
660                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
661                 if (!iommu->reg) {
662                         printk(KERN_ERR "IOMMU: can't map the region\n");
663                         goto error;
664                 }
665         }
666
667         ver = readl(iommu->reg + DMAR_VER_REG);
668         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
669                 (unsigned long long)drhd->reg_base_addr,
670                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
671                 (unsigned long long)iommu->cap,
672                 (unsigned long long)iommu->ecap);
673
674         spin_lock_init(&iommu->register_lock);
675
676         drhd->iommu = iommu;
677         return 0;
678 error:
679         kfree(iommu);
680         return -1;
681 }
682
683 void free_iommu(struct intel_iommu *iommu)
684 {
685         if (!iommu)
686                 return;
687
688 #ifdef CONFIG_DMAR
689         free_dmar_iommu(iommu);
690 #endif
691
692         if (iommu->reg)
693                 iounmap(iommu->reg);
694         kfree(iommu);
695 }
696
697 /*
698  * Reclaim all the submitted descriptors which have completed its work.
699  */
700 static inline void reclaim_free_desc(struct q_inval *qi)
701 {
702         while (qi->desc_status[qi->free_tail] == QI_DONE ||
703                qi->desc_status[qi->free_tail] == QI_ABORT) {
704                 qi->desc_status[qi->free_tail] = QI_FREE;
705                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
706                 qi->free_cnt++;
707         }
708 }
709
710 static int qi_check_fault(struct intel_iommu *iommu, int index)
711 {
712         u32 fault;
713         int head, tail;
714         struct q_inval *qi = iommu->qi;
715         int wait_index = (index + 1) % QI_LENGTH;
716
717         if (qi->desc_status[wait_index] == QI_ABORT)
718                 return -EAGAIN;
719
720         fault = readl(iommu->reg + DMAR_FSTS_REG);
721
722         /*
723          * If IQE happens, the head points to the descriptor associated
724          * with the error. No new descriptors are fetched until the IQE
725          * is cleared.
726          */
727         if (fault & DMA_FSTS_IQE) {
728                 head = readl(iommu->reg + DMAR_IQH_REG);
729                 if ((head >> DMAR_IQ_SHIFT) == index) {
730                         printk(KERN_ERR "VT-d detected invalid descriptor: "
731                                 "low=%llx, high=%llx\n",
732                                 (unsigned long long)qi->desc[index].low,
733                                 (unsigned long long)qi->desc[index].high);
734                         memcpy(&qi->desc[index], &qi->desc[wait_index],
735                                         sizeof(struct qi_desc));
736                         __iommu_flush_cache(iommu, &qi->desc[index],
737                                         sizeof(struct qi_desc));
738                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
739                         return -EINVAL;
740                 }
741         }
742
743         /*
744          * If ITE happens, all pending wait_desc commands are aborted.
745          * No new descriptors are fetched until the ITE is cleared.
746          */
747         if (fault & DMA_FSTS_ITE) {
748                 head = readl(iommu->reg + DMAR_IQH_REG);
749                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
750                 head |= 1;
751                 tail = readl(iommu->reg + DMAR_IQT_REG);
752                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
753
754                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
755
756                 do {
757                         if (qi->desc_status[head] == QI_IN_USE)
758                                 qi->desc_status[head] = QI_ABORT;
759                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
760                 } while (head != tail);
761
762                 if (qi->desc_status[wait_index] == QI_ABORT)
763                         return -EAGAIN;
764         }
765
766         if (fault & DMA_FSTS_ICE)
767                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
768
769         return 0;
770 }
771
772 /*
773  * Submit the queued invalidation descriptor to the remapping
774  * hardware unit and wait for its completion.
775  */
776 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
777 {
778         int rc;
779         struct q_inval *qi = iommu->qi;
780         struct qi_desc *hw, wait_desc;
781         int wait_index, index;
782         unsigned long flags;
783
784         if (!qi)
785                 return 0;
786
787         hw = qi->desc;
788
789 restart:
790         rc = 0;
791
792         spin_lock_irqsave(&qi->q_lock, flags);
793         while (qi->free_cnt < 3) {
794                 spin_unlock_irqrestore(&qi->q_lock, flags);
795                 cpu_relax();
796                 spin_lock_irqsave(&qi->q_lock, flags);
797         }
798
799         index = qi->free_head;
800         wait_index = (index + 1) % QI_LENGTH;
801
802         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
803
804         hw[index] = *desc;
805
806         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
807                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
808         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
809
810         hw[wait_index] = wait_desc;
811
812         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
813         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
814
815         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
816         qi->free_cnt -= 2;
817
818         /*
819          * update the HW tail register indicating the presence of
820          * new descriptors.
821          */
822         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
823
824         while (qi->desc_status[wait_index] != QI_DONE) {
825                 /*
826                  * We will leave the interrupts disabled, to prevent interrupt
827                  * context to queue another cmd while a cmd is already submitted
828                  * and waiting for completion on this cpu. This is to avoid
829                  * a deadlock where the interrupt context can wait indefinitely
830                  * for free slots in the queue.
831                  */
832                 rc = qi_check_fault(iommu, index);
833                 if (rc)
834                         break;
835
836                 spin_unlock(&qi->q_lock);
837                 cpu_relax();
838                 spin_lock(&qi->q_lock);
839         }
840
841         qi->desc_status[index] = QI_DONE;
842
843         reclaim_free_desc(qi);
844         spin_unlock_irqrestore(&qi->q_lock, flags);
845
846         if (rc == -EAGAIN)
847                 goto restart;
848
849         return rc;
850 }
851
852 /*
853  * Flush the global interrupt entry cache.
854  */
855 void qi_global_iec(struct intel_iommu *iommu)
856 {
857         struct qi_desc desc;
858
859         desc.low = QI_IEC_TYPE;
860         desc.high = 0;
861
862         /* should never fail */
863         qi_submit_sync(&desc, iommu);
864 }
865
866 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
867                       u64 type)
868 {
869         struct qi_desc desc;
870
871         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
872                         | QI_CC_GRAN(type) | QI_CC_TYPE;
873         desc.high = 0;
874
875         qi_submit_sync(&desc, iommu);
876 }
877
878 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
879                     unsigned int size_order, u64 type)
880 {
881         u8 dw = 0, dr = 0;
882
883         struct qi_desc desc;
884         int ih = 0;
885
886         if (cap_write_drain(iommu->cap))
887                 dw = 1;
888
889         if (cap_read_drain(iommu->cap))
890                 dr = 1;
891
892         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
893                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
894         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
895                 | QI_IOTLB_AM(size_order);
896
897         qi_submit_sync(&desc, iommu);
898 }
899
900 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
901                         u64 addr, unsigned mask)
902 {
903         struct qi_desc desc;
904
905         if (mask) {
906                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
907                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
908                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
909         } else
910                 desc.high = QI_DEV_IOTLB_ADDR(addr);
911
912         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
913                 qdep = 0;
914
915         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
916                    QI_DIOTLB_TYPE;
917
918         qi_submit_sync(&desc, iommu);
919 }
920
921 /*
922  * Disable Queued Invalidation interface.
923  */
924 void dmar_disable_qi(struct intel_iommu *iommu)
925 {
926         unsigned long flags;
927         u32 sts;
928         cycles_t start_time = get_cycles();
929
930         if (!ecap_qis(iommu->ecap))
931                 return;
932
933         spin_lock_irqsave(&iommu->register_lock, flags);
934
935         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
936         if (!(sts & DMA_GSTS_QIES))
937                 goto end;
938
939         /*
940          * Give a chance to HW to complete the pending invalidation requests.
941          */
942         while ((readl(iommu->reg + DMAR_IQT_REG) !=
943                 readl(iommu->reg + DMAR_IQH_REG)) &&
944                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
945                 cpu_relax();
946
947         iommu->gcmd &= ~DMA_GCMD_QIE;
948         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
949
950         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
951                       !(sts & DMA_GSTS_QIES), sts);
952 end:
953         spin_unlock_irqrestore(&iommu->register_lock, flags);
954 }
955
956 /*
957  * Enable queued invalidation.
958  */
959 static void __dmar_enable_qi(struct intel_iommu *iommu)
960 {
961         u32 sts;
962         unsigned long flags;
963         struct q_inval *qi = iommu->qi;
964
965         qi->free_head = qi->free_tail = 0;
966         qi->free_cnt = QI_LENGTH;
967
968         spin_lock_irqsave(&iommu->register_lock, flags);
969
970         /* write zero to the tail reg */
971         writel(0, iommu->reg + DMAR_IQT_REG);
972
973         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
974
975         iommu->gcmd |= DMA_GCMD_QIE;
976         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
977
978         /* Make sure hardware complete it */
979         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
980
981         spin_unlock_irqrestore(&iommu->register_lock, flags);
982 }
983
984 /*
985  * Enable Queued Invalidation interface. This is a must to support
986  * interrupt-remapping. Also used by DMA-remapping, which replaces
987  * register based IOTLB invalidation.
988  */
989 int dmar_enable_qi(struct intel_iommu *iommu)
990 {
991         struct q_inval *qi;
992
993         if (!ecap_qis(iommu->ecap))
994                 return -ENOENT;
995
996         /*
997          * queued invalidation is already setup and enabled.
998          */
999         if (iommu->qi)
1000                 return 0;
1001
1002         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1003         if (!iommu->qi)
1004                 return -ENOMEM;
1005
1006         qi = iommu->qi;
1007
1008         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1009         if (!qi->desc) {
1010                 kfree(qi);
1011                 iommu->qi = 0;
1012                 return -ENOMEM;
1013         }
1014
1015         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1016         if (!qi->desc_status) {
1017                 free_page((unsigned long) qi->desc);
1018                 kfree(qi);
1019                 iommu->qi = 0;
1020                 return -ENOMEM;
1021         }
1022
1023         qi->free_head = qi->free_tail = 0;
1024         qi->free_cnt = QI_LENGTH;
1025
1026         spin_lock_init(&qi->q_lock);
1027
1028         __dmar_enable_qi(iommu);
1029
1030         return 0;
1031 }
1032
1033 /* iommu interrupt handling. Most stuff are MSI-like. */
1034
1035 enum faulttype {
1036         DMA_REMAP,
1037         INTR_REMAP,
1038         UNKNOWN,
1039 };
1040
1041 static const char *dma_remap_fault_reasons[] =
1042 {
1043         "Software",
1044         "Present bit in root entry is clear",
1045         "Present bit in context entry is clear",
1046         "Invalid context entry",
1047         "Access beyond MGAW",
1048         "PTE Write access is not set",
1049         "PTE Read access is not set",
1050         "Next page table ptr is invalid",
1051         "Root table address invalid",
1052         "Context table ptr is invalid",
1053         "non-zero reserved fields in RTP",
1054         "non-zero reserved fields in CTP",
1055         "non-zero reserved fields in PTE",
1056 };
1057
1058 static const char *intr_remap_fault_reasons[] =
1059 {
1060         "Detected reserved fields in the decoded interrupt-remapped request",
1061         "Interrupt index exceeded the interrupt-remapping table size",
1062         "Present field in the IRTE entry is clear",
1063         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1064         "Detected reserved fields in the IRTE entry",
1065         "Blocked a compatibility format interrupt request",
1066         "Blocked an interrupt request due to source-id verification failure",
1067 };
1068
1069 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1070
1071 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1072 {
1073         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1074                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1075                 *fault_type = INTR_REMAP;
1076                 return intr_remap_fault_reasons[fault_reason - 0x20];
1077         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1078                 *fault_type = DMA_REMAP;
1079                 return dma_remap_fault_reasons[fault_reason];
1080         } else {
1081                 *fault_type = UNKNOWN;
1082                 return "Unknown";
1083         }
1084 }
1085
1086 void dmar_msi_unmask(unsigned int irq)
1087 {
1088         struct intel_iommu *iommu = get_irq_data(irq);
1089         unsigned long flag;
1090
1091         /* unmask it */
1092         spin_lock_irqsave(&iommu->register_lock, flag);
1093         writel(0, iommu->reg + DMAR_FECTL_REG);
1094         /* Read a reg to force flush the post write */
1095         readl(iommu->reg + DMAR_FECTL_REG);
1096         spin_unlock_irqrestore(&iommu->register_lock, flag);
1097 }
1098
1099 void dmar_msi_mask(unsigned int irq)
1100 {
1101         unsigned long flag;
1102         struct intel_iommu *iommu = get_irq_data(irq);
1103
1104         /* mask it */
1105         spin_lock_irqsave(&iommu->register_lock, flag);
1106         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1107         /* Read a reg to force flush the post write */
1108         readl(iommu->reg + DMAR_FECTL_REG);
1109         spin_unlock_irqrestore(&iommu->register_lock, flag);
1110 }
1111
1112 void dmar_msi_write(int irq, struct msi_msg *msg)
1113 {
1114         struct intel_iommu *iommu = get_irq_data(irq);
1115         unsigned long flag;
1116
1117         spin_lock_irqsave(&iommu->register_lock, flag);
1118         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1119         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1120         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1121         spin_unlock_irqrestore(&iommu->register_lock, flag);
1122 }
1123
1124 void dmar_msi_read(int irq, struct msi_msg *msg)
1125 {
1126         struct intel_iommu *iommu = get_irq_data(irq);
1127         unsigned long flag;
1128
1129         spin_lock_irqsave(&iommu->register_lock, flag);
1130         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1131         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1132         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1133         spin_unlock_irqrestore(&iommu->register_lock, flag);
1134 }
1135
1136 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1137                 u8 fault_reason, u16 source_id, unsigned long long addr)
1138 {
1139         const char *reason;
1140         int fault_type;
1141
1142         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1143
1144         if (fault_type == INTR_REMAP)
1145                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1146                        "fault index %llx\n"
1147                         "INTR-REMAP:[fault reason %02d] %s\n",
1148                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1149                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1150                         fault_reason, reason);
1151         else
1152                 printk(KERN_ERR
1153                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1154                        "fault addr %llx \n"
1155                        "DMAR:[fault reason %02d] %s\n",
1156                        (type ? "DMA Read" : "DMA Write"),
1157                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1158                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1159         return 0;
1160 }
1161
1162 #define PRIMARY_FAULT_REG_LEN (16)
1163 irqreturn_t dmar_fault(int irq, void *dev_id)
1164 {
1165         struct intel_iommu *iommu = dev_id;
1166         int reg, fault_index;
1167         u32 fault_status;
1168         unsigned long flag;
1169
1170         spin_lock_irqsave(&iommu->register_lock, flag);
1171         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1172         if (fault_status)
1173                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1174                        fault_status);
1175
1176         /* TBD: ignore advanced fault log currently */
1177         if (!(fault_status & DMA_FSTS_PPF))
1178                 goto clear_rest;
1179
1180         fault_index = dma_fsts_fault_record_index(fault_status);
1181         reg = cap_fault_reg_offset(iommu->cap);
1182         while (1) {
1183                 u8 fault_reason;
1184                 u16 source_id;
1185                 u64 guest_addr;
1186                 int type;
1187                 u32 data;
1188
1189                 /* highest 32 bits */
1190                 data = readl(iommu->reg + reg +
1191                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1192                 if (!(data & DMA_FRCD_F))
1193                         break;
1194
1195                 fault_reason = dma_frcd_fault_reason(data);
1196                 type = dma_frcd_type(data);
1197
1198                 data = readl(iommu->reg + reg +
1199                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1200                 source_id = dma_frcd_source_id(data);
1201
1202                 guest_addr = dmar_readq(iommu->reg + reg +
1203                                 fault_index * PRIMARY_FAULT_REG_LEN);
1204                 guest_addr = dma_frcd_page_addr(guest_addr);
1205                 /* clear the fault */
1206                 writel(DMA_FRCD_F, iommu->reg + reg +
1207                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1208
1209                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1210
1211                 dmar_fault_do_one(iommu, type, fault_reason,
1212                                 source_id, guest_addr);
1213
1214                 fault_index++;
1215                 if (fault_index > cap_num_fault_regs(iommu->cap))
1216                         fault_index = 0;
1217                 spin_lock_irqsave(&iommu->register_lock, flag);
1218         }
1219 clear_rest:
1220         /* clear all the other faults */
1221         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1222         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1223
1224         spin_unlock_irqrestore(&iommu->register_lock, flag);
1225         return IRQ_HANDLED;
1226 }
1227
1228 int dmar_set_interrupt(struct intel_iommu *iommu)
1229 {
1230         int irq, ret;
1231
1232         /*
1233          * Check if the fault interrupt is already initialized.
1234          */
1235         if (iommu->irq)
1236                 return 0;
1237
1238         irq = create_irq();
1239         if (!irq) {
1240                 printk(KERN_ERR "IOMMU: no free vectors\n");
1241                 return -EINVAL;
1242         }
1243
1244         set_irq_data(irq, iommu);
1245         iommu->irq = irq;
1246
1247         ret = arch_setup_dmar_msi(irq);
1248         if (ret) {
1249                 set_irq_data(irq, NULL);
1250                 iommu->irq = 0;
1251                 destroy_irq(irq);
1252                 return ret;
1253         }
1254
1255         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1256         if (ret)
1257                 printk(KERN_ERR "IOMMU: can't request irq\n");
1258         return ret;
1259 }
1260
1261 int __init enable_drhd_fault_handling(void)
1262 {
1263         struct dmar_drhd_unit *drhd;
1264
1265         /*
1266          * Enable fault control interrupt.
1267          */
1268         for_each_drhd_unit(drhd) {
1269                 int ret;
1270                 struct intel_iommu *iommu = drhd->iommu;
1271                 ret = dmar_set_interrupt(iommu);
1272
1273                 if (ret) {
1274                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1275                                " interrupt, ret %d\n",
1276                                (unsigned long long)drhd->reg_base_addr, ret);
1277                         return -1;
1278                 }
1279         }
1280
1281         return 0;
1282 }
1283
1284 /*
1285  * Re-enable Queued Invalidation interface.
1286  */
1287 int dmar_reenable_qi(struct intel_iommu *iommu)
1288 {
1289         if (!ecap_qis(iommu->ecap))
1290                 return -ENOENT;
1291
1292         if (!iommu->qi)
1293                 return -ENOENT;
1294
1295         /*
1296          * First disable queued invalidation.
1297          */
1298         dmar_disable_qi(iommu);
1299         /*
1300          * Then enable queued invalidation again. Since there is no pending
1301          * invalidation requests now, it's safe to re-enable queued
1302          * invalidation.
1303          */
1304         __dmar_enable_qi(iommu);
1305
1306         return 0;
1307 }