iommu: Move IOMMU specific code to intel-iommu.c
[linux-2.6.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/slab.h>
39 #include <asm/iommu_table.h>
40
41 #define PREFIX "DMAR: "
42
43 /* No locks are needed as DMA remapping hardware unit
44  * list is constructed at boot time and hotplug of
45  * these units are not supported by the architecture.
46  */
47 LIST_HEAD(dmar_drhd_units);
48
49 struct acpi_table_header * __initdata dmar_tbl;
50 static acpi_size dmar_tbl_size;
51
52 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 {
54         /*
55          * add INCLUDE_ALL at the tail, so scan the list will find it at
56          * the very end.
57          */
58         if (drhd->include_all)
59                 list_add_tail(&drhd->list, &dmar_drhd_units);
60         else
61                 list_add(&drhd->list, &dmar_drhd_units);
62 }
63
64 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
65                                            struct pci_dev **dev, u16 segment)
66 {
67         struct pci_bus *bus;
68         struct pci_dev *pdev = NULL;
69         struct acpi_dmar_pci_path *path;
70         int count;
71
72         bus = pci_find_bus(segment, scope->bus);
73         path = (struct acpi_dmar_pci_path *)(scope + 1);
74         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
75                 / sizeof(struct acpi_dmar_pci_path);
76
77         while (count) {
78                 if (pdev)
79                         pci_dev_put(pdev);
80                 /*
81                  * Some BIOSes list non-exist devices in DMAR table, just
82                  * ignore it
83                  */
84                 if (!bus) {
85                         printk(KERN_WARNING
86                         PREFIX "Device scope bus [%d] not found\n",
87                         scope->bus);
88                         break;
89                 }
90                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
91                 if (!pdev) {
92                         printk(KERN_WARNING PREFIX
93                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
94                                 segment, bus->number, path->dev, path->fn);
95                         break;
96                 }
97                 path ++;
98                 count --;
99                 bus = pdev->subordinate;
100         }
101         if (!pdev) {
102                 printk(KERN_WARNING PREFIX
103                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
104                 segment, scope->bus, path->dev, path->fn);
105                 *dev = NULL;
106                 return 0;
107         }
108         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109                         pdev->subordinate) || (scope->entry_type == \
110                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111                 pci_dev_put(pdev);
112                 printk(KERN_WARNING PREFIX
113                         "Device scope type does not match for %s\n",
114                          pci_name(pdev));
115                 return -EINVAL;
116         }
117         *dev = pdev;
118         return 0;
119 }
120
121 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
122                                 struct pci_dev ***devices, u16 segment)
123 {
124         struct acpi_dmar_device_scope *scope;
125         void * tmp = start;
126         int index;
127         int ret;
128
129         *cnt = 0;
130         while (start < end) {
131                 scope = start;
132                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
133                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
134                         (*cnt)++;
135                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
136                         printk(KERN_WARNING PREFIX
137                                "Unsupported device scope\n");
138                 }
139                 start += scope->length;
140         }
141         if (*cnt == 0)
142                 return 0;
143
144         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
145         if (!*devices)
146                 return -ENOMEM;
147
148         start = tmp;
149         index = 0;
150         while (start < end) {
151                 scope = start;
152                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
153                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
154                         ret = dmar_parse_one_dev_scope(scope,
155                                 &(*devices)[index], segment);
156                         if (ret) {
157                                 kfree(*devices);
158                                 return ret;
159                         }
160                         index ++;
161                 }
162                 start += scope->length;
163         }
164
165         return 0;
166 }
167
168 /**
169  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
170  * structure which uniquely represent one DMA remapping hardware unit
171  * present in the platform
172  */
173 static int __init
174 dmar_parse_one_drhd(struct acpi_dmar_header *header)
175 {
176         struct acpi_dmar_hardware_unit *drhd;
177         struct dmar_drhd_unit *dmaru;
178         int ret = 0;
179
180         drhd = (struct acpi_dmar_hardware_unit *)header;
181         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
182         if (!dmaru)
183                 return -ENOMEM;
184
185         dmaru->hdr = header;
186         dmaru->reg_base_addr = drhd->address;
187         dmaru->segment = drhd->segment;
188         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
189
190         ret = alloc_iommu(dmaru);
191         if (ret) {
192                 kfree(dmaru);
193                 return ret;
194         }
195         dmar_register_drhd_unit(dmaru);
196         return 0;
197 }
198
199 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
200 {
201         struct acpi_dmar_hardware_unit *drhd;
202         int ret = 0;
203
204         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
205
206         if (dmaru->include_all)
207                 return 0;
208
209         ret = dmar_parse_dev_scope((void *)(drhd + 1),
210                                 ((void *)drhd) + drhd->header.length,
211                                 &dmaru->devices_cnt, &dmaru->devices,
212                                 drhd->segment);
213         if (ret) {
214                 list_del(&dmaru->list);
215                 kfree(dmaru);
216         }
217         return ret;
218 }
219
220 #ifdef CONFIG_ACPI_NUMA
221 static int __init
222 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
223 {
224         struct acpi_dmar_rhsa *rhsa;
225         struct dmar_drhd_unit *drhd;
226
227         rhsa = (struct acpi_dmar_rhsa *)header;
228         for_each_drhd_unit(drhd) {
229                 if (drhd->reg_base_addr == rhsa->base_address) {
230                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
231
232                         if (!node_online(node))
233                                 node = -1;
234                         drhd->iommu->node = node;
235                         return 0;
236                 }
237         }
238         WARN_TAINT(
239                 1, TAINT_FIRMWARE_WORKAROUND,
240                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
241                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
242                 drhd->reg_base_addr,
243                 dmi_get_system_info(DMI_BIOS_VENDOR),
244                 dmi_get_system_info(DMI_BIOS_VERSION),
245                 dmi_get_system_info(DMI_PRODUCT_VERSION));
246
247         return 0;
248 }
249 #endif
250
251 static void __init
252 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
253 {
254         struct acpi_dmar_hardware_unit *drhd;
255         struct acpi_dmar_reserved_memory *rmrr;
256         struct acpi_dmar_atsr *atsr;
257         struct acpi_dmar_rhsa *rhsa;
258
259         switch (header->type) {
260         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
261                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
262                                     header);
263                 printk (KERN_INFO PREFIX
264                         "DRHD base: %#016Lx flags: %#x\n",
265                         (unsigned long long)drhd->address, drhd->flags);
266                 break;
267         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
268                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
269                                     header);
270                 printk (KERN_INFO PREFIX
271                         "RMRR base: %#016Lx end: %#016Lx\n",
272                         (unsigned long long)rmrr->base_address,
273                         (unsigned long long)rmrr->end_address);
274                 break;
275         case ACPI_DMAR_TYPE_ATSR:
276                 atsr = container_of(header, struct acpi_dmar_atsr, header);
277                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
278                 break;
279         case ACPI_DMAR_HARDWARE_AFFINITY:
280                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
281                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
282                        (unsigned long long)rhsa->base_address,
283                        rhsa->proximity_domain);
284                 break;
285         }
286 }
287
288 /**
289  * dmar_table_detect - checks to see if the platform supports DMAR devices
290  */
291 static int __init dmar_table_detect(void)
292 {
293         acpi_status status = AE_OK;
294
295         /* if we could find DMAR table, then there are DMAR devices */
296         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
297                                 (struct acpi_table_header **)&dmar_tbl,
298                                 &dmar_tbl_size);
299
300         if (ACPI_SUCCESS(status) && !dmar_tbl) {
301                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
302                 status = AE_NOT_FOUND;
303         }
304
305         return (ACPI_SUCCESS(status) ? 1 : 0);
306 }
307
308 /**
309  * parse_dmar_table - parses the DMA reporting table
310  */
311 static int __init
312 parse_dmar_table(void)
313 {
314         struct acpi_table_dmar *dmar;
315         struct acpi_dmar_header *entry_header;
316         int ret = 0;
317
318         /*
319          * Do it again, earlier dmar_tbl mapping could be mapped with
320          * fixed map.
321          */
322         dmar_table_detect();
323
324         /*
325          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
326          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
327          */
328         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
329
330         dmar = (struct acpi_table_dmar *)dmar_tbl;
331         if (!dmar)
332                 return -ENODEV;
333
334         if (dmar->width < PAGE_SHIFT - 1) {
335                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
336                 return -EINVAL;
337         }
338
339         printk (KERN_INFO PREFIX "Host address width %d\n",
340                 dmar->width + 1);
341
342         entry_header = (struct acpi_dmar_header *)(dmar + 1);
343         while (((unsigned long)entry_header) <
344                         (((unsigned long)dmar) + dmar_tbl->length)) {
345                 /* Avoid looping forever on bad ACPI tables */
346                 if (entry_header->length == 0) {
347                         printk(KERN_WARNING PREFIX
348                                 "Invalid 0-length structure\n");
349                         ret = -EINVAL;
350                         break;
351                 }
352
353                 dmar_table_print_dmar_entry(entry_header);
354
355                 switch (entry_header->type) {
356                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
357                         ret = dmar_parse_one_drhd(entry_header);
358                         break;
359                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
360                         ret = dmar_parse_one_rmrr(entry_header);
361                         break;
362                 case ACPI_DMAR_TYPE_ATSR:
363                         ret = dmar_parse_one_atsr(entry_header);
364                         break;
365                 case ACPI_DMAR_HARDWARE_AFFINITY:
366 #ifdef CONFIG_ACPI_NUMA
367                         ret = dmar_parse_one_rhsa(entry_header);
368 #endif
369                         break;
370                 default:
371                         printk(KERN_WARNING PREFIX
372                                 "Unknown DMAR structure type %d\n",
373                                 entry_header->type);
374                         ret = 0; /* for forward compatibility */
375                         break;
376                 }
377                 if (ret)
378                         break;
379
380                 entry_header = ((void *)entry_header + entry_header->length);
381         }
382         return ret;
383 }
384
385 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
386                           struct pci_dev *dev)
387 {
388         int index;
389
390         while (dev) {
391                 for (index = 0; index < cnt; index++)
392                         if (dev == devices[index])
393                                 return 1;
394
395                 /* Check our parent */
396                 dev = dev->bus->self;
397         }
398
399         return 0;
400 }
401
402 struct dmar_drhd_unit *
403 dmar_find_matched_drhd_unit(struct pci_dev *dev)
404 {
405         struct dmar_drhd_unit *dmaru = NULL;
406         struct acpi_dmar_hardware_unit *drhd;
407
408         dev = pci_physfn(dev);
409
410         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
411                 drhd = container_of(dmaru->hdr,
412                                     struct acpi_dmar_hardware_unit,
413                                     header);
414
415                 if (dmaru->include_all &&
416                     drhd->segment == pci_domain_nr(dev->bus))
417                         return dmaru;
418
419                 if (dmar_pci_device_match(dmaru->devices,
420                                           dmaru->devices_cnt, dev))
421                         return dmaru;
422         }
423
424         return NULL;
425 }
426
427 int __init dmar_dev_scope_init(void)
428 {
429         static int dmar_dev_scope_initialized;
430         struct dmar_drhd_unit *drhd, *drhd_n;
431         int ret = -ENODEV;
432
433         if (dmar_dev_scope_initialized)
434                 return dmar_dev_scope_initialized;
435
436         if (list_empty(&dmar_drhd_units))
437                 goto fail;
438
439         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
440                 ret = dmar_parse_dev(drhd);
441                 if (ret)
442                         goto fail;
443         }
444
445         ret = dmar_parse_rmrr_atsr_dev();
446         if (ret)
447                 goto fail;
448
449         dmar_dev_scope_initialized = 1;
450         return 0;
451
452 fail:
453         dmar_dev_scope_initialized = ret;
454         return ret;
455 }
456
457
458 int __init dmar_table_init(void)
459 {
460         static int dmar_table_initialized;
461         int ret;
462
463         if (dmar_table_initialized)
464                 return 0;
465
466         dmar_table_initialized = 1;
467
468         ret = parse_dmar_table();
469         if (ret) {
470                 if (ret != -ENODEV)
471                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
472                 return ret;
473         }
474
475         if (list_empty(&dmar_drhd_units)) {
476                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
477                 return -ENODEV;
478         }
479
480         return 0;
481 }
482
483 static void warn_invalid_dmar(u64 addr, const char *message)
484 {
485         WARN_TAINT_ONCE(
486                 1, TAINT_FIRMWARE_WORKAROUND,
487                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
488                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
489                 addr, message,
490                 dmi_get_system_info(DMI_BIOS_VENDOR),
491                 dmi_get_system_info(DMI_BIOS_VERSION),
492                 dmi_get_system_info(DMI_PRODUCT_VERSION));
493 }
494
495 int __init check_zero_address(void)
496 {
497         struct acpi_table_dmar *dmar;
498         struct acpi_dmar_header *entry_header;
499         struct acpi_dmar_hardware_unit *drhd;
500
501         dmar = (struct acpi_table_dmar *)dmar_tbl;
502         entry_header = (struct acpi_dmar_header *)(dmar + 1);
503
504         while (((unsigned long)entry_header) <
505                         (((unsigned long)dmar) + dmar_tbl->length)) {
506                 /* Avoid looping forever on bad ACPI tables */
507                 if (entry_header->length == 0) {
508                         printk(KERN_WARNING PREFIX
509                                 "Invalid 0-length structure\n");
510                         return 0;
511                 }
512
513                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
514                         void __iomem *addr;
515                         u64 cap, ecap;
516
517                         drhd = (void *)entry_header;
518                         if (!drhd->address) {
519                                 warn_invalid_dmar(0, "");
520                                 goto failed;
521                         }
522
523                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
524                         if (!addr ) {
525                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
526                                 goto failed;
527                         }
528                         cap = dmar_readq(addr + DMAR_CAP_REG);
529                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
530                         early_iounmap(addr, VTD_PAGE_SIZE);
531                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
532                                 warn_invalid_dmar(drhd->address,
533                                                   " returns all ones");
534                                 goto failed;
535                         }
536                 }
537
538                 entry_header = ((void *)entry_header + entry_header->length);
539         }
540         return 1;
541
542 failed:
543 #ifdef CONFIG_DMAR
544         dmar_disabled = 1;
545 #endif
546         return 0;
547 }
548
549 int __init detect_intel_iommu(void)
550 {
551         int ret;
552
553         ret = dmar_table_detect();
554         if (ret)
555                 ret = check_zero_address();
556         {
557 #ifdef CONFIG_INTR_REMAP
558                 struct acpi_table_dmar *dmar;
559
560                 dmar = (struct acpi_table_dmar *) dmar_tbl;
561                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
562                         printk(KERN_INFO
563                                "Queued invalidation will be enabled to support "
564                                "x2apic and Intr-remapping.\n");
565 #endif
566 #ifdef CONFIG_DMAR
567                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
568                         iommu_detected = 1;
569                         /* Make sure ACS will be enabled */
570                         pci_request_acs();
571                 }
572 #endif
573 #ifdef CONFIG_X86
574                 if (ret)
575                         x86_init.iommu.iommu_init = intel_iommu_init;
576 #endif
577         }
578         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
579         dmar_tbl = NULL;
580
581         return ret ? 1 : -ENODEV;
582 }
583
584
585 int alloc_iommu(struct dmar_drhd_unit *drhd)
586 {
587         struct intel_iommu *iommu;
588         int map_size;
589         u32 ver;
590         static int iommu_allocated = 0;
591         int agaw = 0;
592         int msagaw = 0;
593
594         if (!drhd->reg_base_addr) {
595                 warn_invalid_dmar(0, "");
596                 return -EINVAL;
597         }
598
599         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
600         if (!iommu)
601                 return -ENOMEM;
602
603         iommu->seq_id = iommu_allocated++;
604         sprintf (iommu->name, "dmar%d", iommu->seq_id);
605
606         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
607         if (!iommu->reg) {
608                 printk(KERN_ERR "IOMMU: can't map the region\n");
609                 goto error;
610         }
611         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
612         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
613
614         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
615                 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
616                 goto err_unmap;
617         }
618
619         agaw = iommu_calculate_agaw(iommu);
620         if (agaw < 0) {
621                 printk(KERN_ERR
622                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
623                        iommu->seq_id);
624                 goto err_unmap;
625         }
626         msagaw = iommu_calculate_max_sagaw(iommu);
627         if (msagaw < 0) {
628                 printk(KERN_ERR
629                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
630                         iommu->seq_id);
631                 goto err_unmap;
632         }
633         iommu->agaw = agaw;
634         iommu->msagaw = msagaw;
635
636         iommu->node = -1;
637
638         /* the registers might be more than one page */
639         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
640                 cap_max_fault_reg_offset(iommu->cap));
641         map_size = VTD_PAGE_ALIGN(map_size);
642         if (map_size > VTD_PAGE_SIZE) {
643                 iounmap(iommu->reg);
644                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
645                 if (!iommu->reg) {
646                         printk(KERN_ERR "IOMMU: can't map the region\n");
647                         goto error;
648                 }
649         }
650
651         ver = readl(iommu->reg + DMAR_VER_REG);
652         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
653                 iommu->seq_id,
654                 (unsigned long long)drhd->reg_base_addr,
655                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
656                 (unsigned long long)iommu->cap,
657                 (unsigned long long)iommu->ecap);
658
659         raw_spin_lock_init(&iommu->register_lock);
660
661         drhd->iommu = iommu;
662         return 0;
663
664  err_unmap:
665         iounmap(iommu->reg);
666  error:
667         kfree(iommu);
668         return -1;
669 }
670
671 void free_iommu(struct intel_iommu *iommu)
672 {
673         if (!iommu)
674                 return;
675
676         free_dmar_iommu(iommu);
677
678         if (iommu->reg)
679                 iounmap(iommu->reg);
680         kfree(iommu);
681 }
682
683 /*
684  * Reclaim all the submitted descriptors which have completed its work.
685  */
686 static inline void reclaim_free_desc(struct q_inval *qi)
687 {
688         while (qi->desc_status[qi->free_tail] == QI_DONE ||
689                qi->desc_status[qi->free_tail] == QI_ABORT) {
690                 qi->desc_status[qi->free_tail] = QI_FREE;
691                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
692                 qi->free_cnt++;
693         }
694 }
695
696 static int qi_check_fault(struct intel_iommu *iommu, int index)
697 {
698         u32 fault;
699         int head, tail;
700         struct q_inval *qi = iommu->qi;
701         int wait_index = (index + 1) % QI_LENGTH;
702
703         if (qi->desc_status[wait_index] == QI_ABORT)
704                 return -EAGAIN;
705
706         fault = readl(iommu->reg + DMAR_FSTS_REG);
707
708         /*
709          * If IQE happens, the head points to the descriptor associated
710          * with the error. No new descriptors are fetched until the IQE
711          * is cleared.
712          */
713         if (fault & DMA_FSTS_IQE) {
714                 head = readl(iommu->reg + DMAR_IQH_REG);
715                 if ((head >> DMAR_IQ_SHIFT) == index) {
716                         printk(KERN_ERR "VT-d detected invalid descriptor: "
717                                 "low=%llx, high=%llx\n",
718                                 (unsigned long long)qi->desc[index].low,
719                                 (unsigned long long)qi->desc[index].high);
720                         memcpy(&qi->desc[index], &qi->desc[wait_index],
721                                         sizeof(struct qi_desc));
722                         __iommu_flush_cache(iommu, &qi->desc[index],
723                                         sizeof(struct qi_desc));
724                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
725                         return -EINVAL;
726                 }
727         }
728
729         /*
730          * If ITE happens, all pending wait_desc commands are aborted.
731          * No new descriptors are fetched until the ITE is cleared.
732          */
733         if (fault & DMA_FSTS_ITE) {
734                 head = readl(iommu->reg + DMAR_IQH_REG);
735                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
736                 head |= 1;
737                 tail = readl(iommu->reg + DMAR_IQT_REG);
738                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
739
740                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
741
742                 do {
743                         if (qi->desc_status[head] == QI_IN_USE)
744                                 qi->desc_status[head] = QI_ABORT;
745                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
746                 } while (head != tail);
747
748                 if (qi->desc_status[wait_index] == QI_ABORT)
749                         return -EAGAIN;
750         }
751
752         if (fault & DMA_FSTS_ICE)
753                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
754
755         return 0;
756 }
757
758 /*
759  * Submit the queued invalidation descriptor to the remapping
760  * hardware unit and wait for its completion.
761  */
762 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
763 {
764         int rc;
765         struct q_inval *qi = iommu->qi;
766         struct qi_desc *hw, wait_desc;
767         int wait_index, index;
768         unsigned long flags;
769
770         if (!qi)
771                 return 0;
772
773         hw = qi->desc;
774
775 restart:
776         rc = 0;
777
778         raw_spin_lock_irqsave(&qi->q_lock, flags);
779         while (qi->free_cnt < 3) {
780                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
781                 cpu_relax();
782                 raw_spin_lock_irqsave(&qi->q_lock, flags);
783         }
784
785         index = qi->free_head;
786         wait_index = (index + 1) % QI_LENGTH;
787
788         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
789
790         hw[index] = *desc;
791
792         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
793                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
794         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
795
796         hw[wait_index] = wait_desc;
797
798         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
799         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
800
801         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
802         qi->free_cnt -= 2;
803
804         /*
805          * update the HW tail register indicating the presence of
806          * new descriptors.
807          */
808         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
809
810         while (qi->desc_status[wait_index] != QI_DONE) {
811                 /*
812                  * We will leave the interrupts disabled, to prevent interrupt
813                  * context to queue another cmd while a cmd is already submitted
814                  * and waiting for completion on this cpu. This is to avoid
815                  * a deadlock where the interrupt context can wait indefinitely
816                  * for free slots in the queue.
817                  */
818                 rc = qi_check_fault(iommu, index);
819                 if (rc)
820                         break;
821
822                 raw_spin_unlock(&qi->q_lock);
823                 cpu_relax();
824                 raw_spin_lock(&qi->q_lock);
825         }
826
827         qi->desc_status[index] = QI_DONE;
828
829         reclaim_free_desc(qi);
830         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
831
832         if (rc == -EAGAIN)
833                 goto restart;
834
835         return rc;
836 }
837
838 /*
839  * Flush the global interrupt entry cache.
840  */
841 void qi_global_iec(struct intel_iommu *iommu)
842 {
843         struct qi_desc desc;
844
845         desc.low = QI_IEC_TYPE;
846         desc.high = 0;
847
848         /* should never fail */
849         qi_submit_sync(&desc, iommu);
850 }
851
852 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
853                       u64 type)
854 {
855         struct qi_desc desc;
856
857         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
858                         | QI_CC_GRAN(type) | QI_CC_TYPE;
859         desc.high = 0;
860
861         qi_submit_sync(&desc, iommu);
862 }
863
864 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
865                     unsigned int size_order, u64 type)
866 {
867         u8 dw = 0, dr = 0;
868
869         struct qi_desc desc;
870         int ih = 0;
871
872         if (cap_write_drain(iommu->cap))
873                 dw = 1;
874
875         if (cap_read_drain(iommu->cap))
876                 dr = 1;
877
878         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
879                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
880         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
881                 | QI_IOTLB_AM(size_order);
882
883         qi_submit_sync(&desc, iommu);
884 }
885
886 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
887                         u64 addr, unsigned mask)
888 {
889         struct qi_desc desc;
890
891         if (mask) {
892                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
893                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
894                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
895         } else
896                 desc.high = QI_DEV_IOTLB_ADDR(addr);
897
898         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
899                 qdep = 0;
900
901         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
902                    QI_DIOTLB_TYPE;
903
904         qi_submit_sync(&desc, iommu);
905 }
906
907 /*
908  * Disable Queued Invalidation interface.
909  */
910 void dmar_disable_qi(struct intel_iommu *iommu)
911 {
912         unsigned long flags;
913         u32 sts;
914         cycles_t start_time = get_cycles();
915
916         if (!ecap_qis(iommu->ecap))
917                 return;
918
919         raw_spin_lock_irqsave(&iommu->register_lock, flags);
920
921         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
922         if (!(sts & DMA_GSTS_QIES))
923                 goto end;
924
925         /*
926          * Give a chance to HW to complete the pending invalidation requests.
927          */
928         while ((readl(iommu->reg + DMAR_IQT_REG) !=
929                 readl(iommu->reg + DMAR_IQH_REG)) &&
930                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
931                 cpu_relax();
932
933         iommu->gcmd &= ~DMA_GCMD_QIE;
934         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
935
936         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
937                       !(sts & DMA_GSTS_QIES), sts);
938 end:
939         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
940 }
941
942 /*
943  * Enable queued invalidation.
944  */
945 static void __dmar_enable_qi(struct intel_iommu *iommu)
946 {
947         u32 sts;
948         unsigned long flags;
949         struct q_inval *qi = iommu->qi;
950
951         qi->free_head = qi->free_tail = 0;
952         qi->free_cnt = QI_LENGTH;
953
954         raw_spin_lock_irqsave(&iommu->register_lock, flags);
955
956         /* write zero to the tail reg */
957         writel(0, iommu->reg + DMAR_IQT_REG);
958
959         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
960
961         iommu->gcmd |= DMA_GCMD_QIE;
962         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
963
964         /* Make sure hardware complete it */
965         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
966
967         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
968 }
969
970 /*
971  * Enable Queued Invalidation interface. This is a must to support
972  * interrupt-remapping. Also used by DMA-remapping, which replaces
973  * register based IOTLB invalidation.
974  */
975 int dmar_enable_qi(struct intel_iommu *iommu)
976 {
977         struct q_inval *qi;
978         struct page *desc_page;
979
980         if (!ecap_qis(iommu->ecap))
981                 return -ENOENT;
982
983         /*
984          * queued invalidation is already setup and enabled.
985          */
986         if (iommu->qi)
987                 return 0;
988
989         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
990         if (!iommu->qi)
991                 return -ENOMEM;
992
993         qi = iommu->qi;
994
995
996         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
997         if (!desc_page) {
998                 kfree(qi);
999                 iommu->qi = 0;
1000                 return -ENOMEM;
1001         }
1002
1003         qi->desc = page_address(desc_page);
1004
1005         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1006         if (!qi->desc_status) {
1007                 free_page((unsigned long) qi->desc);
1008                 kfree(qi);
1009                 iommu->qi = 0;
1010                 return -ENOMEM;
1011         }
1012
1013         qi->free_head = qi->free_tail = 0;
1014         qi->free_cnt = QI_LENGTH;
1015
1016         raw_spin_lock_init(&qi->q_lock);
1017
1018         __dmar_enable_qi(iommu);
1019
1020         return 0;
1021 }
1022
1023 /* iommu interrupt handling. Most stuff are MSI-like. */
1024
1025 enum faulttype {
1026         DMA_REMAP,
1027         INTR_REMAP,
1028         UNKNOWN,
1029 };
1030
1031 static const char *dma_remap_fault_reasons[] =
1032 {
1033         "Software",
1034         "Present bit in root entry is clear",
1035         "Present bit in context entry is clear",
1036         "Invalid context entry",
1037         "Access beyond MGAW",
1038         "PTE Write access is not set",
1039         "PTE Read access is not set",
1040         "Next page table ptr is invalid",
1041         "Root table address invalid",
1042         "Context table ptr is invalid",
1043         "non-zero reserved fields in RTP",
1044         "non-zero reserved fields in CTP",
1045         "non-zero reserved fields in PTE",
1046 };
1047
1048 static const char *intr_remap_fault_reasons[] =
1049 {
1050         "Detected reserved fields in the decoded interrupt-remapped request",
1051         "Interrupt index exceeded the interrupt-remapping table size",
1052         "Present field in the IRTE entry is clear",
1053         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1054         "Detected reserved fields in the IRTE entry",
1055         "Blocked a compatibility format interrupt request",
1056         "Blocked an interrupt request due to source-id verification failure",
1057 };
1058
1059 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1060
1061 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1062 {
1063         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1064                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1065                 *fault_type = INTR_REMAP;
1066                 return intr_remap_fault_reasons[fault_reason - 0x20];
1067         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1068                 *fault_type = DMA_REMAP;
1069                 return dma_remap_fault_reasons[fault_reason];
1070         } else {
1071                 *fault_type = UNKNOWN;
1072                 return "Unknown";
1073         }
1074 }
1075
1076 void dmar_msi_unmask(struct irq_data *data)
1077 {
1078         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1079         unsigned long flag;
1080
1081         /* unmask it */
1082         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1083         writel(0, iommu->reg + DMAR_FECTL_REG);
1084         /* Read a reg to force flush the post write */
1085         readl(iommu->reg + DMAR_FECTL_REG);
1086         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1087 }
1088
1089 void dmar_msi_mask(struct irq_data *data)
1090 {
1091         unsigned long flag;
1092         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1093
1094         /* mask it */
1095         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1096         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1097         /* Read a reg to force flush the post write */
1098         readl(iommu->reg + DMAR_FECTL_REG);
1099         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1100 }
1101
1102 void dmar_msi_write(int irq, struct msi_msg *msg)
1103 {
1104         struct intel_iommu *iommu = irq_get_handler_data(irq);
1105         unsigned long flag;
1106
1107         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1108         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1109         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1110         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1111         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1112 }
1113
1114 void dmar_msi_read(int irq, struct msi_msg *msg)
1115 {
1116         struct intel_iommu *iommu = irq_get_handler_data(irq);
1117         unsigned long flag;
1118
1119         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1120         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1121         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1122         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1123         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1124 }
1125
1126 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1127                 u8 fault_reason, u16 source_id, unsigned long long addr)
1128 {
1129         const char *reason;
1130         int fault_type;
1131
1132         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1133
1134         if (fault_type == INTR_REMAP)
1135                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1136                        "fault index %llx\n"
1137                         "INTR-REMAP:[fault reason %02d] %s\n",
1138                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1139                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1140                         fault_reason, reason);
1141         else
1142                 printk(KERN_ERR
1143                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1144                        "fault addr %llx \n"
1145                        "DMAR:[fault reason %02d] %s\n",
1146                        (type ? "DMA Read" : "DMA Write"),
1147                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1148                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1149         return 0;
1150 }
1151
1152 #define PRIMARY_FAULT_REG_LEN (16)
1153 irqreturn_t dmar_fault(int irq, void *dev_id)
1154 {
1155         struct intel_iommu *iommu = dev_id;
1156         int reg, fault_index;
1157         u32 fault_status;
1158         unsigned long flag;
1159
1160         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1161         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1162         if (fault_status)
1163                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1164                        fault_status);
1165
1166         /* TBD: ignore advanced fault log currently */
1167         if (!(fault_status & DMA_FSTS_PPF))
1168                 goto clear_rest;
1169
1170         fault_index = dma_fsts_fault_record_index(fault_status);
1171         reg = cap_fault_reg_offset(iommu->cap);
1172         while (1) {
1173                 u8 fault_reason;
1174                 u16 source_id;
1175                 u64 guest_addr;
1176                 int type;
1177                 u32 data;
1178
1179                 /* highest 32 bits */
1180                 data = readl(iommu->reg + reg +
1181                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1182                 if (!(data & DMA_FRCD_F))
1183                         break;
1184
1185                 fault_reason = dma_frcd_fault_reason(data);
1186                 type = dma_frcd_type(data);
1187
1188                 data = readl(iommu->reg + reg +
1189                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1190                 source_id = dma_frcd_source_id(data);
1191
1192                 guest_addr = dmar_readq(iommu->reg + reg +
1193                                 fault_index * PRIMARY_FAULT_REG_LEN);
1194                 guest_addr = dma_frcd_page_addr(guest_addr);
1195                 /* clear the fault */
1196                 writel(DMA_FRCD_F, iommu->reg + reg +
1197                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1198
1199                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1200
1201                 dmar_fault_do_one(iommu, type, fault_reason,
1202                                 source_id, guest_addr);
1203
1204                 fault_index++;
1205                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1206                         fault_index = 0;
1207                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1208         }
1209 clear_rest:
1210         /* clear all the other faults */
1211         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1212         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1213
1214         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1215         return IRQ_HANDLED;
1216 }
1217
1218 int dmar_set_interrupt(struct intel_iommu *iommu)
1219 {
1220         int irq, ret;
1221
1222         /*
1223          * Check if the fault interrupt is already initialized.
1224          */
1225         if (iommu->irq)
1226                 return 0;
1227
1228         irq = create_irq();
1229         if (!irq) {
1230                 printk(KERN_ERR "IOMMU: no free vectors\n");
1231                 return -EINVAL;
1232         }
1233
1234         irq_set_handler_data(irq, iommu);
1235         iommu->irq = irq;
1236
1237         ret = arch_setup_dmar_msi(irq);
1238         if (ret) {
1239                 irq_set_handler_data(irq, NULL);
1240                 iommu->irq = 0;
1241                 destroy_irq(irq);
1242                 return ret;
1243         }
1244
1245         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1246         if (ret)
1247                 printk(KERN_ERR "IOMMU: can't request irq\n");
1248         return ret;
1249 }
1250
1251 int __init enable_drhd_fault_handling(void)
1252 {
1253         struct dmar_drhd_unit *drhd;
1254
1255         /*
1256          * Enable fault control interrupt.
1257          */
1258         for_each_drhd_unit(drhd) {
1259                 int ret;
1260                 struct intel_iommu *iommu = drhd->iommu;
1261                 ret = dmar_set_interrupt(iommu);
1262
1263                 if (ret) {
1264                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1265                                " interrupt, ret %d\n",
1266                                (unsigned long long)drhd->reg_base_addr, ret);
1267                         return -1;
1268                 }
1269
1270                 /*
1271                  * Clear any previous faults.
1272                  */
1273                 dmar_fault(iommu->irq, iommu);
1274         }
1275
1276         return 0;
1277 }
1278
1279 /*
1280  * Re-enable Queued Invalidation interface.
1281  */
1282 int dmar_reenable_qi(struct intel_iommu *iommu)
1283 {
1284         if (!ecap_qis(iommu->ecap))
1285                 return -ENOENT;
1286
1287         if (!iommu->qi)
1288                 return -ENOENT;
1289
1290         /*
1291          * First disable queued invalidation.
1292          */
1293         dmar_disable_qi(iommu);
1294         /*
1295          * Then enable queued invalidation again. Since there is no pending
1296          * invalidation requests now, it's safe to re-enable queued
1297          * invalidation.
1298          */
1299         __dmar_enable_qi(iommu);
1300
1301         return 0;
1302 }
1303
1304 /*
1305  * Check interrupt remapping support in DMAR table description.
1306  */
1307 int __init dmar_ir_support(void)
1308 {
1309         struct acpi_table_dmar *dmar;
1310         dmar = (struct acpi_table_dmar *)dmar_tbl;
1311         if (!dmar)
1312                 return 0;
1313         return dmar->flags & 0x1;
1314 }
1315 IOMMU_INIT_POST(detect_intel_iommu);