blob: 6a49485eb49da783a5d7e404de9b3806cbff2524 [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson5f096b12015-10-27 14:53:04 -060028#include <linux/pci.h>
Alex Williamson9587f442013-04-25 16:12:38 -060029#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060030#include <linux/sched.h>
31#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060032#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060033#include <linux/string.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/wait.h>
37
38#define DRIVER_VERSION "0.3"
39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40#define DRIVER_DESC "VFIO - User Level meta-driver"
41
42static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070050 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060051 wait_queue_head_t release_q;
52} vfio;
53
54struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57};
58
59struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060062 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060063 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
Alex Williamson03a76b62015-12-21 15:13:33 -070065 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060066};
67
Alex Williamson60720a02015-02-06 15:05:06 -070068struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71};
72
Alex Williamsoncba33452012-07-31 08:16:22 -060073struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
Alex Williamson60720a02015-02-06 15:05:06 -070085 struct list_head unbound_list;
86 struct mutex unbound_lock;
Alex Williamson6d6768c2013-06-25 16:06:54 -060087 atomic_t opened;
Alex Williamson03a76b62015-12-21 15:13:33 -070088 bool noiommu;
Jike Songccd46db2016-12-01 13:20:06 +080089 struct kvm *kvm;
90 struct blocking_notifier_head notifier;
Alex Williamsoncba33452012-07-31 08:16:22 -060091};
92
93struct vfio_device {
94 struct kref kref;
95 struct device *dev;
96 const struct vfio_device_ops *ops;
97 struct vfio_group *group;
98 struct list_head group_next;
99 void *device_data;
100};
101
Alex Williamson03a76b62015-12-21 15:13:33 -0700102#ifdef CONFIG_VFIO_NOIOMMU
103static bool noiommu __read_mostly;
104module_param_named(enable_unsafe_noiommu_mode,
105 noiommu, bool, S_IRUGO | S_IWUSR);
106MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
107#endif
108
109/*
110 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
111 * and remove functions, any use cases other than acquiring the first
112 * reference for the purpose of calling vfio_add_group_dev() or removing
113 * that symmetric reference after vfio_del_group_dev() should use the raw
114 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
115 * removes the device from the dummy group and cannot be nested.
116 */
117struct iommu_group *vfio_iommu_group_get(struct device *dev)
118{
119 struct iommu_group *group;
120 int __maybe_unused ret;
121
122 group = iommu_group_get(dev);
123
124#ifdef CONFIG_VFIO_NOIOMMU
125 /*
126 * With noiommu enabled, an IOMMU group will be created for a device
127 * that doesn't already have one and doesn't have an iommu_ops on their
Alex Williamson16ab8a52016-01-27 11:22:25 -0700128 * bus. We set iommudata simply to be able to identify these groups
129 * as special use and for reclamation later.
Alex Williamson03a76b62015-12-21 15:13:33 -0700130 */
131 if (group || !noiommu || iommu_present(dev->bus))
132 return group;
133
134 group = iommu_group_alloc();
135 if (IS_ERR(group))
136 return NULL;
137
138 iommu_group_set_name(group, "vfio-noiommu");
Alex Williamson16ab8a52016-01-27 11:22:25 -0700139 iommu_group_set_iommudata(group, &noiommu, NULL);
Alex Williamson03a76b62015-12-21 15:13:33 -0700140 ret = iommu_group_add_device(group, dev);
141 iommu_group_put(group);
142 if (ret)
143 return NULL;
144
145 /*
146 * Where to taint? At this point we've added an IOMMU group for a
147 * device that is not backed by iommu_ops, therefore any iommu_
148 * callback using iommu_ops can legitimately Oops. So, while we may
149 * be about to give a DMA capable device to a user without IOMMU
150 * protection, which is clearly taint-worthy, let's go ahead and do
151 * it here.
152 */
153 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
154 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
155#endif
156
157 return group;
158}
159EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
160
161void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
162{
163#ifdef CONFIG_VFIO_NOIOMMU
Alex Williamson16ab8a52016-01-27 11:22:25 -0700164 if (iommu_group_get_iommudata(group) == &noiommu)
Alex Williamson03a76b62015-12-21 15:13:33 -0700165 iommu_group_remove_device(dev);
166#endif
167
168 iommu_group_put(group);
169}
170EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
171
172#ifdef CONFIG_VFIO_NOIOMMU
173static void *vfio_noiommu_open(unsigned long arg)
174{
175 if (arg != VFIO_NOIOMMU_IOMMU)
176 return ERR_PTR(-EINVAL);
177 if (!capable(CAP_SYS_RAWIO))
178 return ERR_PTR(-EPERM);
179
180 return NULL;
181}
182
183static void vfio_noiommu_release(void *iommu_data)
184{
185}
186
187static long vfio_noiommu_ioctl(void *iommu_data,
188 unsigned int cmd, unsigned long arg)
189{
190 if (cmd == VFIO_CHECK_EXTENSION)
191 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
192
193 return -ENOTTY;
194}
195
Alex Williamson03a76b62015-12-21 15:13:33 -0700196static int vfio_noiommu_attach_group(void *iommu_data,
197 struct iommu_group *iommu_group)
198{
Alex Williamson16ab8a52016-01-27 11:22:25 -0700199 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
Alex Williamson03a76b62015-12-21 15:13:33 -0700200}
201
202static void vfio_noiommu_detach_group(void *iommu_data,
203 struct iommu_group *iommu_group)
204{
205}
206
207static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
208 .name = "vfio-noiommu",
209 .owner = THIS_MODULE,
210 .open = vfio_noiommu_open,
211 .release = vfio_noiommu_release,
212 .ioctl = vfio_noiommu_ioctl,
213 .attach_group = vfio_noiommu_attach_group,
214 .detach_group = vfio_noiommu_detach_group,
215};
216#endif
217
218
Alex Williamsoncba33452012-07-31 08:16:22 -0600219/**
220 * IOMMU driver registration
221 */
222int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
223{
224 struct vfio_iommu_driver *driver, *tmp;
225
226 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
227 if (!driver)
228 return -ENOMEM;
229
230 driver->ops = ops;
231
232 mutex_lock(&vfio.iommu_drivers_lock);
233
234 /* Check for duplicates */
235 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
236 if (tmp->ops == ops) {
237 mutex_unlock(&vfio.iommu_drivers_lock);
238 kfree(driver);
239 return -EINVAL;
240 }
241 }
242
243 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
244
245 mutex_unlock(&vfio.iommu_drivers_lock);
246
247 return 0;
248}
249EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
250
251void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
252{
253 struct vfio_iommu_driver *driver;
254
255 mutex_lock(&vfio.iommu_drivers_lock);
256 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
257 if (driver->ops == ops) {
258 list_del(&driver->vfio_next);
259 mutex_unlock(&vfio.iommu_drivers_lock);
260 kfree(driver);
261 return;
262 }
263 }
264 mutex_unlock(&vfio.iommu_drivers_lock);
265}
266EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
267
268/**
269 * Group minor allocation/free - both called with vfio.group_lock held
270 */
271static int vfio_alloc_group_minor(struct vfio_group *group)
272{
Alex Williamsond1099902013-12-19 10:17:13 -0700273 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600274}
275
276static void vfio_free_group_minor(int minor)
277{
278 idr_remove(&vfio.group_idr, minor);
279}
280
281static int vfio_iommu_group_notifier(struct notifier_block *nb,
282 unsigned long action, void *data);
283static void vfio_group_get(struct vfio_group *group);
284
285/**
286 * Container objects - containers are created when /dev/vfio/vfio is
287 * opened, but their lifecycle extends until the last user is done, so
288 * it's freed via kref. Must support container/group/device being
289 * closed in any order.
290 */
291static void vfio_container_get(struct vfio_container *container)
292{
293 kref_get(&container->kref);
294}
295
296static void vfio_container_release(struct kref *kref)
297{
298 struct vfio_container *container;
299 container = container_of(kref, struct vfio_container, kref);
300
301 kfree(container);
302}
303
304static void vfio_container_put(struct vfio_container *container)
305{
306 kref_put(&container->kref, vfio_container_release);
307}
308
Jiang Liu9df7b252012-12-07 13:43:50 -0700309static void vfio_group_unlock_and_free(struct vfio_group *group)
310{
311 mutex_unlock(&vfio.group_lock);
312 /*
313 * Unregister outside of lock. A spurious callback is harmless now
314 * that the group is no longer in vfio.group_list.
315 */
316 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
317 kfree(group);
318}
319
Alex Williamsoncba33452012-07-31 08:16:22 -0600320/**
321 * Group objects - create, release, get, put, search
322 */
Alex Williamson16ab8a52016-01-27 11:22:25 -0700323static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600324{
325 struct vfio_group *group, *tmp;
326 struct device *dev;
327 int ret, minor;
328
329 group = kzalloc(sizeof(*group), GFP_KERNEL);
330 if (!group)
331 return ERR_PTR(-ENOMEM);
332
333 kref_init(&group->kref);
334 INIT_LIST_HEAD(&group->device_list);
335 mutex_init(&group->device_lock);
Alex Williamson60720a02015-02-06 15:05:06 -0700336 INIT_LIST_HEAD(&group->unbound_list);
337 mutex_init(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600338 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600339 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600340 group->iommu_group = iommu_group;
Alex Williamson16ab8a52016-01-27 11:22:25 -0700341#ifdef CONFIG_VFIO_NOIOMMU
342 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
343#endif
Jike Songccd46db2016-12-01 13:20:06 +0800344 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
Alex Williamsoncba33452012-07-31 08:16:22 -0600345
346 group->nb.notifier_call = vfio_iommu_group_notifier;
347
348 /*
349 * blocking notifiers acquire a rwsem around registering and hold
350 * it around callback. Therefore, need to register outside of
351 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
352 * do anything unless it can find the group in vfio.group_list, so
353 * no harm in registering early.
354 */
355 ret = iommu_group_register_notifier(iommu_group, &group->nb);
356 if (ret) {
357 kfree(group);
358 return ERR_PTR(ret);
359 }
360
361 mutex_lock(&vfio.group_lock);
362
Alex Williamsoncba33452012-07-31 08:16:22 -0600363 /* Did we race creating this group? */
364 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
365 if (tmp->iommu_group == iommu_group) {
366 vfio_group_get(tmp);
Jiang Liu9df7b252012-12-07 13:43:50 -0700367 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600368 return tmp;
369 }
370 }
371
Zhen Lei2f51bf42015-03-16 14:08:56 -0600372 minor = vfio_alloc_group_minor(group);
373 if (minor < 0) {
374 vfio_group_unlock_and_free(group);
375 return ERR_PTR(minor);
376 }
377
Alex Williamsond1099902013-12-19 10:17:13 -0700378 dev = device_create(vfio.class, NULL,
379 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamson03a76b62015-12-21 15:13:33 -0700380 group, "%s%d", group->noiommu ? "noiommu-" : "",
381 iommu_group_id(iommu_group));
Alex Williamsoncba33452012-07-31 08:16:22 -0600382 if (IS_ERR(dev)) {
383 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700384 vfio_group_unlock_and_free(group);
Dan Carpenter7b3a10d2017-05-18 10:34:31 +0300385 return ERR_CAST(dev);
Alex Williamsoncba33452012-07-31 08:16:22 -0600386 }
387
388 group->minor = minor;
389 group->dev = dev;
390
391 list_add(&group->vfio_next, &vfio.group_list);
392
393 mutex_unlock(&vfio.group_lock);
394
395 return group;
396}
397
Al Viro6d2cd3c2012-08-17 21:27:32 -0400398/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600399static void vfio_group_release(struct kref *kref)
400{
401 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
Alex Williamson60720a02015-02-06 15:05:06 -0700402 struct vfio_unbound_dev *unbound, *tmp;
Alex Williamson4a688102015-02-06 15:05:06 -0700403 struct iommu_group *iommu_group = group->iommu_group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600404
405 WARN_ON(!list_empty(&group->device_list));
Alex Williamson65b1ade2017-03-21 13:19:09 -0600406 WARN_ON(group->notifier.head);
Alex Williamsoncba33452012-07-31 08:16:22 -0600407
Alex Williamson60720a02015-02-06 15:05:06 -0700408 list_for_each_entry_safe(unbound, tmp,
409 &group->unbound_list, unbound_next) {
410 list_del(&unbound->unbound_next);
411 kfree(unbound);
412 }
413
Alex Williamsond1099902013-12-19 10:17:13 -0700414 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600415 list_del(&group->vfio_next);
416 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700417 vfio_group_unlock_and_free(group);
Alex Williamson4a688102015-02-06 15:05:06 -0700418 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600419}
420
421static void vfio_group_put(struct vfio_group *group)
422{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400423 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600424}
425
426/* Assume group_lock or group reference is held */
427static void vfio_group_get(struct vfio_group *group)
428{
429 kref_get(&group->kref);
430}
431
432/*
433 * Not really a try as we will sleep for mutex, but we need to make
434 * sure the group pointer is valid under lock and get a reference.
435 */
436static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
437{
438 struct vfio_group *target = group;
439
440 mutex_lock(&vfio.group_lock);
441 list_for_each_entry(group, &vfio.group_list, vfio_next) {
442 if (group == target) {
443 vfio_group_get(group);
444 mutex_unlock(&vfio.group_lock);
445 return group;
446 }
447 }
448 mutex_unlock(&vfio.group_lock);
449
450 return NULL;
451}
452
453static
454struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
455{
456 struct vfio_group *group;
457
458 mutex_lock(&vfio.group_lock);
459 list_for_each_entry(group, &vfio.group_list, vfio_next) {
460 if (group->iommu_group == iommu_group) {
461 vfio_group_get(group);
462 mutex_unlock(&vfio.group_lock);
463 return group;
464 }
465 }
466 mutex_unlock(&vfio.group_lock);
467
468 return NULL;
469}
470
471static struct vfio_group *vfio_group_get_from_minor(int minor)
472{
473 struct vfio_group *group;
474
475 mutex_lock(&vfio.group_lock);
476 group = idr_find(&vfio.group_idr, minor);
477 if (!group) {
478 mutex_unlock(&vfio.group_lock);
479 return NULL;
480 }
481 vfio_group_get(group);
482 mutex_unlock(&vfio.group_lock);
483
484 return group;
485}
486
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530487static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
488{
489 struct iommu_group *iommu_group;
490 struct vfio_group *group;
491
492 iommu_group = iommu_group_get(dev);
493 if (!iommu_group)
494 return NULL;
495
496 group = vfio_group_get_from_iommu(iommu_group);
497 iommu_group_put(iommu_group);
498
499 return group;
500}
501
Alex Williamsoncba33452012-07-31 08:16:22 -0600502/**
503 * Device objects - create, release, get, put, search
504 */
505static
506struct vfio_device *vfio_group_create_device(struct vfio_group *group,
507 struct device *dev,
508 const struct vfio_device_ops *ops,
509 void *device_data)
510{
511 struct vfio_device *device;
Alex Williamsoncba33452012-07-31 08:16:22 -0600512
513 device = kzalloc(sizeof(*device), GFP_KERNEL);
514 if (!device)
515 return ERR_PTR(-ENOMEM);
516
517 kref_init(&device->kref);
518 device->dev = dev;
519 device->group = group;
520 device->ops = ops;
521 device->device_data = device_data;
Jean Delvare8283b492014-04-14 12:55:38 +0200522 dev_set_drvdata(dev, device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600523
524 /* No need to get group_lock, caller has group reference */
525 vfio_group_get(group);
526
527 mutex_lock(&group->device_lock);
528 list_add(&device->group_next, &group->device_list);
529 mutex_unlock(&group->device_lock);
530
531 return device;
532}
533
534static void vfio_device_release(struct kref *kref)
535{
536 struct vfio_device *device = container_of(kref,
537 struct vfio_device, kref);
538 struct vfio_group *group = device->group;
539
Alex Williamsoncba33452012-07-31 08:16:22 -0600540 list_del(&device->group_next);
541 mutex_unlock(&group->device_lock);
542
543 dev_set_drvdata(device->dev, NULL);
544
545 kfree(device);
546
547 /* vfio_del_group_dev may be waiting for this device */
548 wake_up(&vfio.release_q);
549}
550
551/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600552void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600553{
Al Viro934ad4c2012-08-17 19:49:09 -0400554 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400555 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400556 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600557}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600558EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600559
560static void vfio_device_get(struct vfio_device *device)
561{
562 vfio_group_get(device->group);
563 kref_get(&device->kref);
564}
565
566static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
567 struct device *dev)
568{
569 struct vfio_device *device;
570
571 mutex_lock(&group->device_lock);
572 list_for_each_entry(device, &group->device_list, group_next) {
573 if (device->dev == dev) {
574 vfio_device_get(device);
575 mutex_unlock(&group->device_lock);
576 return device;
577 }
578 }
579 mutex_unlock(&group->device_lock);
580 return NULL;
581}
582
583/*
Alex Williamson5f096b12015-10-27 14:53:04 -0600584 * Some drivers, like pci-stub, are only used to prevent other drivers from
585 * claiming a device and are therefore perfectly legitimate for a user owned
586 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
587 * of the device, but it does prevent the user from having direct access to
588 * the device, which is useful in some circumstances.
589 *
590 * We also assume that we can include PCI interconnect devices, ie. bridges.
591 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
592 * then all of the downstream devices will be part of the same IOMMU group as
593 * the bridge. Thus, if placing the bridge into the user owned IOVA space
594 * breaks anything, it only does so for user owned devices downstream. Note
595 * that error notification via MSI can be affected for platforms that handle
596 * MSI within the same IOVA space as DMA.
Alex Williamsoncba33452012-07-31 08:16:22 -0600597 */
Alex Williamson5f096b12015-10-27 14:53:04 -0600598static const char * const vfio_driver_whitelist[] = { "pci-stub" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600599
Alex Williamson5f096b12015-10-27 14:53:04 -0600600static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
Alex Williamsoncba33452012-07-31 08:16:22 -0600601{
602 int i;
603
Alex Williamson5f096b12015-10-27 14:53:04 -0600604 if (dev_is_pci(dev)) {
605 struct pci_dev *pdev = to_pci_dev(dev);
606
607 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
608 return true;
609 }
610
Alex Williamsoncba33452012-07-31 08:16:22 -0600611 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
612 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
613 return true;
614 }
615
616 return false;
617}
618
619/*
Alex Williamson60720a02015-02-06 15:05:06 -0700620 * A vfio group is viable for use by userspace if all devices are in
621 * one of the following states:
622 * - driver-less
623 * - bound to a vfio driver
624 * - bound to a whitelisted driver
Alex Williamson5f096b12015-10-27 14:53:04 -0600625 * - a PCI interconnect device
Alex Williamson60720a02015-02-06 15:05:06 -0700626 *
627 * We use two methods to determine whether a device is bound to a vfio
628 * driver. The first is to test whether the device exists in the vfio
629 * group. The second is to test if the device exists on the group
630 * unbound_list, indicating it's in the middle of transitioning from
631 * a vfio driver to driver-less.
Alex Williamsoncba33452012-07-31 08:16:22 -0600632 */
633static int vfio_dev_viable(struct device *dev, void *data)
634{
635 struct vfio_group *group = data;
636 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700637 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamson60720a02015-02-06 15:05:06 -0700638 struct vfio_unbound_dev *unbound;
639 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600640
Alex Williamson60720a02015-02-06 15:05:06 -0700641 mutex_lock(&group->unbound_lock);
642 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
643 if (dev == unbound->dev) {
644 ret = 0;
645 break;
646 }
647 }
648 mutex_unlock(&group->unbound_lock);
649
Alex Williamson5f096b12015-10-27 14:53:04 -0600650 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600651 return 0;
652
653 device = vfio_group_get_device(group, dev);
654 if (device) {
655 vfio_device_put(device);
656 return 0;
657 }
658
Alex Williamson60720a02015-02-06 15:05:06 -0700659 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600660}
661
662/**
663 * Async device support
664 */
665static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
666{
667 struct vfio_device *device;
668
669 /* Do we already know about it? We shouldn't */
670 device = vfio_group_get_device(group, dev);
671 if (WARN_ON_ONCE(device)) {
672 vfio_device_put(device);
673 return 0;
674 }
675
676 /* Nothing to do for idle groups */
677 if (!atomic_read(&group->container_users))
678 return 0;
679
680 /* TODO Prevent device auto probing */
Dan Carpenter049af102015-11-21 13:32:21 +0300681 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
Alex Williamsoncba33452012-07-31 08:16:22 -0600682 iommu_group_id(group->iommu_group));
683
684 return 0;
685}
686
Alex Williamsoncba33452012-07-31 08:16:22 -0600687static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
688{
689 /* We don't care what happens when the group isn't in use */
690 if (!atomic_read(&group->container_users))
691 return 0;
692
693 return vfio_dev_viable(dev, group);
694}
695
696static int vfio_iommu_group_notifier(struct notifier_block *nb,
697 unsigned long action, void *data)
698{
699 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
700 struct device *dev = data;
Alex Williamson60720a02015-02-06 15:05:06 -0700701 struct vfio_unbound_dev *unbound;
Alex Williamsoncba33452012-07-31 08:16:22 -0600702
703 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600704 * Need to go through a group_lock lookup to get a reference or we
705 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600706 */
707 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600708 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600709 return NOTIFY_OK;
710
711 switch (action) {
712 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
713 vfio_group_nb_add_dev(group, dev);
714 break;
715 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600716 /*
717 * Nothing to do here. If the device is in use, then the
718 * vfio sub-driver should block the remove callback until
719 * it is unused. If the device is unused or attached to a
720 * stub driver, then it should be released and we don't
721 * care that it will be going away.
722 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600723 break;
724 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
725 pr_debug("%s: Device %s, group %d binding to driver\n",
726 __func__, dev_name(dev),
727 iommu_group_id(group->iommu_group));
728 break;
729 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
730 pr_debug("%s: Device %s, group %d bound to driver %s\n",
731 __func__, dev_name(dev),
732 iommu_group_id(group->iommu_group), dev->driver->name);
733 BUG_ON(vfio_group_nb_verify(group, dev));
734 break;
735 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
736 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
737 __func__, dev_name(dev),
738 iommu_group_id(group->iommu_group), dev->driver->name);
739 break;
740 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
741 pr_debug("%s: Device %s, group %d unbound from driver\n",
742 __func__, dev_name(dev),
743 iommu_group_id(group->iommu_group));
744 /*
745 * XXX An unbound device in a live group is ok, but we'd
746 * really like to avoid the above BUG_ON by preventing other
747 * drivers from binding to it. Once that occurs, we have to
748 * stop the system to maintain isolation. At a minimum, we'd
749 * want a toggle to disable driver auto probe for this device.
750 */
Alex Williamson60720a02015-02-06 15:05:06 -0700751
752 mutex_lock(&group->unbound_lock);
753 list_for_each_entry(unbound,
754 &group->unbound_list, unbound_next) {
755 if (dev == unbound->dev) {
756 list_del(&unbound->unbound_next);
757 kfree(unbound);
758 break;
759 }
760 }
761 mutex_unlock(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600762 break;
763 }
764
765 vfio_group_put(group);
766 return NOTIFY_OK;
767}
768
769/**
770 * VFIO driver API
771 */
772int vfio_add_group_dev(struct device *dev,
773 const struct vfio_device_ops *ops, void *device_data)
774{
775 struct iommu_group *iommu_group;
776 struct vfio_group *group;
777 struct vfio_device *device;
778
779 iommu_group = iommu_group_get(dev);
780 if (!iommu_group)
781 return -EINVAL;
782
783 group = vfio_group_get_from_iommu(iommu_group);
784 if (!group) {
Alex Williamson16ab8a52016-01-27 11:22:25 -0700785 group = vfio_create_group(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600786 if (IS_ERR(group)) {
787 iommu_group_put(iommu_group);
788 return PTR_ERR(group);
789 }
Alex Williamson4a688102015-02-06 15:05:06 -0700790 } else {
791 /*
792 * A found vfio_group already holds a reference to the
793 * iommu_group. A created vfio_group keeps the reference.
794 */
795 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600796 }
797
798 device = vfio_group_get_device(group, dev);
799 if (device) {
800 WARN(1, "Device %s already exists on group %d\n",
801 dev_name(dev), iommu_group_id(iommu_group));
802 vfio_device_put(device);
803 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600804 return -EBUSY;
805 }
806
807 device = vfio_group_create_device(group, dev, ops, device_data);
808 if (IS_ERR(device)) {
809 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600810 return PTR_ERR(device);
811 }
812
813 /*
Alex Williamson4a688102015-02-06 15:05:06 -0700814 * Drop all but the vfio_device reference. The vfio_device holds
815 * a reference to the vfio_group, which holds a reference to the
816 * iommu_group.
Alex Williamsoncba33452012-07-31 08:16:22 -0600817 */
818 vfio_group_put(group);
819
820 return 0;
821}
822EXPORT_SYMBOL_GPL(vfio_add_group_dev);
823
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600824/**
Alex Williamson20f30012015-06-09 10:08:57 -0600825 * Get a reference to the vfio_device for a device. Even if the
826 * caller thinks they own the device, they could be racing with a
827 * release call path, so we can't trust drvdata for the shortcut.
828 * Go the long way around, from the iommu_group to the vfio_group
829 * to the vfio_device.
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600830 */
831struct vfio_device *vfio_device_get_from_dev(struct device *dev)
832{
Alex Williamson20f30012015-06-09 10:08:57 -0600833 struct vfio_group *group;
834 struct vfio_device *device;
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600835
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530836 group = vfio_group_get_from_dev(dev);
Alex Williamson20f30012015-06-09 10:08:57 -0600837 if (!group)
838 return NULL;
839
840 device = vfio_group_get_device(group, dev);
841 vfio_group_put(group);
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600842
843 return device;
844}
845EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
846
Alex Williamson4bc94d52015-07-24 15:14:04 -0600847static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
848 char *buf)
849{
Joerg Roedele324fc82015-11-04 13:53:26 +0100850 struct vfio_device *it, *device = NULL;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600851
852 mutex_lock(&group->device_lock);
Joerg Roedele324fc82015-11-04 13:53:26 +0100853 list_for_each_entry(it, &group->device_list, group_next) {
854 if (!strcmp(dev_name(it->dev), buf)) {
855 device = it;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600856 vfio_device_get(device);
857 break;
858 }
859 }
860 mutex_unlock(&group->device_lock);
861
862 return device;
863}
864
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600865/*
866 * Caller must hold a reference to the vfio_device
867 */
868void *vfio_device_data(struct vfio_device *device)
869{
870 return device->device_data;
871}
872EXPORT_SYMBOL_GPL(vfio_device_data);
873
Alex Williamsone014e942013-02-14 14:02:13 -0700874/* Given a referenced group, check if it contains the device */
875static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600876{
Alex Williamsoncba33452012-07-31 08:16:22 -0600877 struct vfio_device *device;
878
Alex Williamsoncba33452012-07-31 08:16:22 -0600879 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700880 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600881 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600882
883 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600884 return true;
885}
886
887/*
888 * Decrement the device reference count and wait for the device to be
889 * removed. Open file descriptors for the device... */
890void *vfio_del_group_dev(struct device *dev)
891{
892 struct vfio_device *device = dev_get_drvdata(dev);
893 struct vfio_group *group = device->group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600894 void *device_data = device->device_data;
Alex Williamson60720a02015-02-06 15:05:06 -0700895 struct vfio_unbound_dev *unbound;
Alex Williamson13060b62015-02-06 15:05:07 -0700896 unsigned int i = 0;
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600897 long ret;
898 bool interrupted = false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600899
Alex Williamsone014e942013-02-14 14:02:13 -0700900 /*
901 * The group exists so long as we have a device reference. Get
902 * a group reference and use it to scan for the device going away.
903 */
904 vfio_group_get(group);
905
Alex Williamson60720a02015-02-06 15:05:06 -0700906 /*
907 * When the device is removed from the group, the group suddenly
908 * becomes non-viable; the device has a driver (until the unbind
909 * completes), but it's not present in the group. This is bad news
910 * for any external users that need to re-acquire a group reference
911 * in order to match and release their existing reference. To
912 * solve this, we track such devices on the unbound_list to bridge
913 * the gap until they're fully unbound.
914 */
915 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
916 if (unbound) {
917 unbound->dev = dev;
918 mutex_lock(&group->unbound_lock);
919 list_add(&unbound->unbound_next, &group->unbound_list);
920 mutex_unlock(&group->unbound_lock);
921 }
922 WARN_ON(!unbound);
923
Alex Williamsoncba33452012-07-31 08:16:22 -0600924 vfio_device_put(device);
925
Alex Williamson13060b62015-02-06 15:05:07 -0700926 /*
927 * If the device is still present in the group after the above
928 * 'put', then it is in use and we need to request it from the
929 * bus driver. The driver may in turn need to request the
930 * device from the user. We send the request on an arbitrary
931 * interval with counter to allow the driver to take escalating
932 * measures to release the device if it has the ability to do so.
933 */
934 do {
935 device = vfio_group_get_device(group, dev);
936 if (!device)
937 break;
938
939 if (device->ops->request)
940 device->ops->request(device_data, i++);
941
942 vfio_device_put(device);
943
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600944 if (interrupted) {
945 ret = wait_event_timeout(vfio.release_q,
946 !vfio_dev_present(group, dev), HZ * 10);
947 } else {
948 ret = wait_event_interruptible_timeout(vfio.release_q,
949 !vfio_dev_present(group, dev), HZ * 10);
950 if (ret == -ERESTARTSYS) {
951 interrupted = true;
952 dev_warn(dev,
953 "Device is currently in use, task"
954 " \"%s\" (%d) "
955 "blocked until device is released",
956 current->comm, task_pid_nr(current));
957 }
958 }
959 } while (ret <= 0);
Alex Williamsone014e942013-02-14 14:02:13 -0700960
961 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600962
Alex Williamsoncba33452012-07-31 08:16:22 -0600963 return device_data;
964}
965EXPORT_SYMBOL_GPL(vfio_del_group_dev);
966
967/**
968 * VFIO base fd, /dev/vfio/vfio
969 */
970static long vfio_ioctl_check_extension(struct vfio_container *container,
971 unsigned long arg)
972{
Alex Williamson0b43c082013-04-29 08:41:36 -0600973 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600974 long ret = 0;
975
Alex Williamson0b43c082013-04-29 08:41:36 -0600976 down_read(&container->group_lock);
977
978 driver = container->iommu_driver;
979
Alex Williamsoncba33452012-07-31 08:16:22 -0600980 switch (arg) {
981 /* No base extensions yet */
982 default:
983 /*
984 * If no driver is set, poll all registered drivers for
985 * extensions and return the first positive result. If
986 * a driver is already set, further queries will be passed
987 * only to that driver.
988 */
989 if (!driver) {
990 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -0700991 list_for_each_entry(driver, &vfio.iommu_drivers_list,
992 vfio_next) {
Alex Williamson03a76b62015-12-21 15:13:33 -0700993
994#ifdef CONFIG_VFIO_NOIOMMU
995 if (!list_empty(&container->group_list) &&
996 (container->noiommu !=
997 (driver->ops == &vfio_noiommu_ops)))
998 continue;
999#endif
1000
Alex Williamsoncba33452012-07-31 08:16:22 -06001001 if (!try_module_get(driver->ops->owner))
1002 continue;
1003
1004 ret = driver->ops->ioctl(NULL,
1005 VFIO_CHECK_EXTENSION,
1006 arg);
1007 module_put(driver->ops->owner);
1008 if (ret > 0)
1009 break;
1010 }
1011 mutex_unlock(&vfio.iommu_drivers_lock);
1012 } else
1013 ret = driver->ops->ioctl(container->iommu_data,
1014 VFIO_CHECK_EXTENSION, arg);
1015 }
1016
Alex Williamson0b43c082013-04-29 08:41:36 -06001017 up_read(&container->group_lock);
1018
Alex Williamsoncba33452012-07-31 08:16:22 -06001019 return ret;
1020}
1021
Alex Williamson9587f442013-04-25 16:12:38 -06001022/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -06001023static int __vfio_container_attach_groups(struct vfio_container *container,
1024 struct vfio_iommu_driver *driver,
1025 void *data)
1026{
1027 struct vfio_group *group;
1028 int ret = -ENODEV;
1029
1030 list_for_each_entry(group, &container->group_list, container_next) {
1031 ret = driver->ops->attach_group(data, group->iommu_group);
1032 if (ret)
1033 goto unwind;
1034 }
1035
1036 return ret;
1037
1038unwind:
1039 list_for_each_entry_continue_reverse(group, &container->group_list,
1040 container_next) {
1041 driver->ops->detach_group(data, group->iommu_group);
1042 }
1043
1044 return ret;
1045}
1046
1047static long vfio_ioctl_set_iommu(struct vfio_container *container,
1048 unsigned long arg)
1049{
1050 struct vfio_iommu_driver *driver;
1051 long ret = -ENODEV;
1052
Alex Williamson9587f442013-04-25 16:12:38 -06001053 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001054
1055 /*
1056 * The container is designed to be an unprivileged interface while
1057 * the group can be assigned to specific users. Therefore, only by
1058 * adding a group to a container does the user get the privilege of
1059 * enabling the iommu, which may allocate finite resources. There
1060 * is no unset_iommu, but by removing all the groups from a container,
1061 * the container is deprivileged and returns to an unset state.
1062 */
1063 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -06001064 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001065 return -EINVAL;
1066 }
1067
1068 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001069 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001070 void *data;
1071
Alex Williamson03a76b62015-12-21 15:13:33 -07001072#ifdef CONFIG_VFIO_NOIOMMU
1073 /*
1074 * Only noiommu containers can use vfio-noiommu and noiommu
1075 * containers can only use vfio-noiommu.
1076 */
1077 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1078 continue;
1079#endif
1080
Alex Williamsoncba33452012-07-31 08:16:22 -06001081 if (!try_module_get(driver->ops->owner))
1082 continue;
1083
1084 /*
1085 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1086 * so test which iommu driver reported support for this
1087 * extension and call open on them. We also pass them the
1088 * magic, allowing a single driver to support multiple
1089 * interfaces if they'd like.
1090 */
1091 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1092 module_put(driver->ops->owner);
1093 continue;
1094 }
1095
Alex Williamsoncba33452012-07-31 08:16:22 -06001096 data = driver->ops->open(arg);
1097 if (IS_ERR(data)) {
1098 ret = PTR_ERR(data);
1099 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001100 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001101 }
1102
1103 ret = __vfio_container_attach_groups(container, driver, data);
Alex Williamson7c435b42016-02-22 16:02:30 -07001104 if (ret) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001105 driver->ops->release(data);
1106 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001107 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001108 }
1109
Alex Williamson7c435b42016-02-22 16:02:30 -07001110 container->iommu_driver = driver;
1111 container->iommu_data = data;
1112 break;
Alex Williamsoncba33452012-07-31 08:16:22 -06001113 }
1114
1115 mutex_unlock(&vfio.iommu_drivers_lock);
Alex Williamson9587f442013-04-25 16:12:38 -06001116 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001117
1118 return ret;
1119}
1120
1121static long vfio_fops_unl_ioctl(struct file *filep,
1122 unsigned int cmd, unsigned long arg)
1123{
1124 struct vfio_container *container = filep->private_data;
1125 struct vfio_iommu_driver *driver;
1126 void *data;
1127 long ret = -EINVAL;
1128
1129 if (!container)
1130 return ret;
1131
Alex Williamsoncba33452012-07-31 08:16:22 -06001132 switch (cmd) {
1133 case VFIO_GET_API_VERSION:
1134 ret = VFIO_API_VERSION;
1135 break;
1136 case VFIO_CHECK_EXTENSION:
1137 ret = vfio_ioctl_check_extension(container, arg);
1138 break;
1139 case VFIO_SET_IOMMU:
1140 ret = vfio_ioctl_set_iommu(container, arg);
1141 break;
1142 default:
Alex Williamson0b43c082013-04-29 08:41:36 -06001143 down_read(&container->group_lock);
1144
1145 driver = container->iommu_driver;
1146 data = container->iommu_data;
1147
Alex Williamsoncba33452012-07-31 08:16:22 -06001148 if (driver) /* passthrough all unrecognized ioctls */
1149 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -06001150
1151 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001152 }
1153
1154 return ret;
1155}
1156
1157#ifdef CONFIG_COMPAT
1158static long vfio_fops_compat_ioctl(struct file *filep,
1159 unsigned int cmd, unsigned long arg)
1160{
1161 arg = (unsigned long)compat_ptr(arg);
1162 return vfio_fops_unl_ioctl(filep, cmd, arg);
1163}
1164#endif /* CONFIG_COMPAT */
1165
1166static int vfio_fops_open(struct inode *inode, struct file *filep)
1167{
1168 struct vfio_container *container;
1169
1170 container = kzalloc(sizeof(*container), GFP_KERNEL);
1171 if (!container)
1172 return -ENOMEM;
1173
1174 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -06001175 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001176 kref_init(&container->kref);
1177
1178 filep->private_data = container;
1179
1180 return 0;
1181}
1182
1183static int vfio_fops_release(struct inode *inode, struct file *filep)
1184{
1185 struct vfio_container *container = filep->private_data;
1186
1187 filep->private_data = NULL;
1188
1189 vfio_container_put(container);
1190
1191 return 0;
1192}
1193
1194/*
1195 * Once an iommu driver is set, we optionally pass read/write/mmap
1196 * on to the driver, allowing management interfaces beyond ioctl.
1197 */
1198static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1199 size_t count, loff_t *ppos)
1200{
1201 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001202 struct vfio_iommu_driver *driver;
1203 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001204
Alex Williamson0b43c082013-04-29 08:41:36 -06001205 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001206
Alex Williamson0b43c082013-04-29 08:41:36 -06001207 driver = container->iommu_driver;
1208 if (likely(driver && driver->ops->read))
1209 ret = driver->ops->read(container->iommu_data,
1210 buf, count, ppos);
1211
1212 up_read(&container->group_lock);
1213
1214 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001215}
1216
1217static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1218 size_t count, loff_t *ppos)
1219{
1220 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001221 struct vfio_iommu_driver *driver;
1222 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001223
Alex Williamson0b43c082013-04-29 08:41:36 -06001224 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001225
Alex Williamson0b43c082013-04-29 08:41:36 -06001226 driver = container->iommu_driver;
1227 if (likely(driver && driver->ops->write))
1228 ret = driver->ops->write(container->iommu_data,
1229 buf, count, ppos);
1230
1231 up_read(&container->group_lock);
1232
1233 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001234}
1235
1236static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1237{
1238 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001239 struct vfio_iommu_driver *driver;
1240 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001241
Alex Williamson0b43c082013-04-29 08:41:36 -06001242 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001243
Alex Williamson0b43c082013-04-29 08:41:36 -06001244 driver = container->iommu_driver;
1245 if (likely(driver && driver->ops->mmap))
1246 ret = driver->ops->mmap(container->iommu_data, vma);
1247
1248 up_read(&container->group_lock);
1249
1250 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001251}
1252
1253static const struct file_operations vfio_fops = {
1254 .owner = THIS_MODULE,
1255 .open = vfio_fops_open,
1256 .release = vfio_fops_release,
1257 .read = vfio_fops_read,
1258 .write = vfio_fops_write,
1259 .unlocked_ioctl = vfio_fops_unl_ioctl,
1260#ifdef CONFIG_COMPAT
1261 .compat_ioctl = vfio_fops_compat_ioctl,
1262#endif
1263 .mmap = vfio_fops_mmap,
1264};
1265
1266/**
1267 * VFIO Group fd, /dev/vfio/$GROUP
1268 */
1269static void __vfio_group_unset_container(struct vfio_group *group)
1270{
1271 struct vfio_container *container = group->container;
1272 struct vfio_iommu_driver *driver;
1273
Alex Williamson9587f442013-04-25 16:12:38 -06001274 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001275
1276 driver = container->iommu_driver;
1277 if (driver)
1278 driver->ops->detach_group(container->iommu_data,
1279 group->iommu_group);
1280
1281 group->container = NULL;
1282 list_del(&group->container_next);
1283
1284 /* Detaching the last group deprivileges a container, remove iommu */
1285 if (driver && list_empty(&container->group_list)) {
1286 driver->ops->release(container->iommu_data);
1287 module_put(driver->ops->owner);
1288 container->iommu_driver = NULL;
1289 container->iommu_data = NULL;
1290 }
1291
Alex Williamson9587f442013-04-25 16:12:38 -06001292 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001293
1294 vfio_container_put(container);
1295}
1296
1297/*
1298 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1299 * if there was no container to unset. Since the ioctl is called on
1300 * the group, we know that still exists, therefore the only valid
1301 * transition here is 1->0.
1302 */
1303static int vfio_group_unset_container(struct vfio_group *group)
1304{
1305 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1306
1307 if (!users)
1308 return -EINVAL;
1309 if (users != 1)
1310 return -EBUSY;
1311
1312 __vfio_group_unset_container(group);
1313
1314 return 0;
1315}
1316
1317/*
1318 * When removing container users, anything that removes the last user
1319 * implicitly removes the group from the container. That is, if the
1320 * group file descriptor is closed, as well as any device file descriptors,
1321 * the group is free.
1322 */
1323static void vfio_group_try_dissolve_container(struct vfio_group *group)
1324{
1325 if (0 == atomic_dec_if_positive(&group->container_users))
1326 __vfio_group_unset_container(group);
1327}
1328
1329static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1330{
Al Viro2903ff02012-08-28 12:52:22 -04001331 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001332 struct vfio_container *container;
1333 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001334 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001335
1336 if (atomic_read(&group->container_users))
1337 return -EINVAL;
1338
Alex Williamson03a76b62015-12-21 15:13:33 -07001339 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1340 return -EPERM;
1341
Al Viro2903ff02012-08-28 12:52:22 -04001342 f = fdget(container_fd);
1343 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001344 return -EBADF;
1345
1346 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001347 if (f.file->f_op != &vfio_fops) {
1348 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001349 return -EINVAL;
1350 }
1351
Al Viro2903ff02012-08-28 12:52:22 -04001352 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001353 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1354
Alex Williamson9587f442013-04-25 16:12:38 -06001355 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001356
Alex Williamson03a76b62015-12-21 15:13:33 -07001357 /* Real groups and fake groups cannot mix */
1358 if (!list_empty(&container->group_list) &&
1359 container->noiommu != group->noiommu) {
1360 ret = -EPERM;
1361 goto unlock_out;
1362 }
1363
Alex Williamsoncba33452012-07-31 08:16:22 -06001364 driver = container->iommu_driver;
1365 if (driver) {
1366 ret = driver->ops->attach_group(container->iommu_data,
1367 group->iommu_group);
1368 if (ret)
1369 goto unlock_out;
1370 }
1371
1372 group->container = container;
Alex Williamson03a76b62015-12-21 15:13:33 -07001373 container->noiommu = group->noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -06001374 list_add(&group->container_next, &container->group_list);
1375
1376 /* Get a reference on the container and mark a user within the group */
1377 vfio_container_get(container);
1378 atomic_inc(&group->container_users);
1379
1380unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001381 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001382 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001383 return ret;
1384}
1385
1386static bool vfio_group_viable(struct vfio_group *group)
1387{
1388 return (iommu_group_for_each_dev(group->iommu_group,
1389 group, vfio_dev_viable) == 0);
1390}
1391
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301392static int vfio_group_add_container_user(struct vfio_group *group)
1393{
1394 if (!atomic_inc_not_zero(&group->container_users))
1395 return -EINVAL;
1396
1397 if (group->noiommu) {
1398 atomic_dec(&group->container_users);
1399 return -EPERM;
1400 }
1401 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1402 atomic_dec(&group->container_users);
1403 return -EINVAL;
1404 }
1405
1406 return 0;
1407}
1408
Alex Williamsoncba33452012-07-31 08:16:22 -06001409static const struct file_operations vfio_device_fops;
1410
1411static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1412{
1413 struct vfio_device *device;
1414 struct file *filep;
Alex Williamson4bc94d52015-07-24 15:14:04 -06001415 int ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001416
1417 if (0 == atomic_read(&group->container_users) ||
1418 !group->container->iommu_driver || !vfio_group_viable(group))
1419 return -EINVAL;
1420
Alex Williamson03a76b62015-12-21 15:13:33 -07001421 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1422 return -EPERM;
1423
Alex Williamson4bc94d52015-07-24 15:14:04 -06001424 device = vfio_device_get_from_name(group, buf);
1425 if (!device)
1426 return -ENODEV;
Alex Williamsoncba33452012-07-31 08:16:22 -06001427
Alex Williamson4bc94d52015-07-24 15:14:04 -06001428 ret = device->ops->open(device->device_data);
1429 if (ret) {
1430 vfio_device_put(device);
1431 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001432 }
Alex Williamson4bc94d52015-07-24 15:14:04 -06001433
1434 /*
1435 * We can't use anon_inode_getfd() because we need to modify
1436 * the f_mode flags directly to allow more than just ioctls
1437 */
1438 ret = get_unused_fd_flags(O_CLOEXEC);
1439 if (ret < 0) {
1440 device->ops->release(device->device_data);
1441 vfio_device_put(device);
1442 return ret;
1443 }
1444
1445 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1446 device, O_RDWR);
1447 if (IS_ERR(filep)) {
1448 put_unused_fd(ret);
1449 ret = PTR_ERR(filep);
1450 device->ops->release(device->device_data);
1451 vfio_device_put(device);
1452 return ret;
1453 }
1454
1455 /*
1456 * TODO: add an anon_inode interface to do this.
1457 * Appears to be missing by lack of need rather than
1458 * explicitly prevented. Now there's need.
1459 */
1460 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1461
1462 atomic_inc(&group->container_users);
1463
1464 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001465
Alex Williamson03a76b62015-12-21 15:13:33 -07001466 if (group->noiommu)
1467 dev_warn(device->dev, "vfio-noiommu device opened by user "
1468 "(%s:%d)\n", current->comm, task_pid_nr(current));
1469
Alex Williamsoncba33452012-07-31 08:16:22 -06001470 return ret;
1471}
1472
1473static long vfio_group_fops_unl_ioctl(struct file *filep,
1474 unsigned int cmd, unsigned long arg)
1475{
1476 struct vfio_group *group = filep->private_data;
1477 long ret = -ENOTTY;
1478
1479 switch (cmd) {
1480 case VFIO_GROUP_GET_STATUS:
1481 {
1482 struct vfio_group_status status;
1483 unsigned long minsz;
1484
1485 minsz = offsetofend(struct vfio_group_status, flags);
1486
1487 if (copy_from_user(&status, (void __user *)arg, minsz))
1488 return -EFAULT;
1489
1490 if (status.argsz < minsz)
1491 return -EINVAL;
1492
1493 status.flags = 0;
1494
1495 if (vfio_group_viable(group))
1496 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1497
1498 if (group->container)
1499 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1500
1501 if (copy_to_user((void __user *)arg, &status, minsz))
1502 return -EFAULT;
1503
1504 ret = 0;
1505 break;
1506 }
1507 case VFIO_GROUP_SET_CONTAINER:
1508 {
1509 int fd;
1510
1511 if (get_user(fd, (int __user *)arg))
1512 return -EFAULT;
1513
1514 if (fd < 0)
1515 return -EINVAL;
1516
1517 ret = vfio_group_set_container(group, fd);
1518 break;
1519 }
1520 case VFIO_GROUP_UNSET_CONTAINER:
1521 ret = vfio_group_unset_container(group);
1522 break;
1523 case VFIO_GROUP_GET_DEVICE_FD:
1524 {
1525 char *buf;
1526
1527 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1528 if (IS_ERR(buf))
1529 return PTR_ERR(buf);
1530
1531 ret = vfio_group_get_device_fd(group, buf);
1532 kfree(buf);
1533 break;
1534 }
1535 }
1536
1537 return ret;
1538}
1539
1540#ifdef CONFIG_COMPAT
1541static long vfio_group_fops_compat_ioctl(struct file *filep,
1542 unsigned int cmd, unsigned long arg)
1543{
1544 arg = (unsigned long)compat_ptr(arg);
1545 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1546}
1547#endif /* CONFIG_COMPAT */
1548
1549static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1550{
1551 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001552 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001553
1554 group = vfio_group_get_from_minor(iminor(inode));
1555 if (!group)
1556 return -ENODEV;
1557
Alex Williamson03a76b62015-12-21 15:13:33 -07001558 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1559 vfio_group_put(group);
1560 return -EPERM;
1561 }
1562
Alex Williamson6d6768c2013-06-25 16:06:54 -06001563 /* Do we need multiple instances of the group open? Seems not. */
1564 opened = atomic_cmpxchg(&group->opened, 0, 1);
1565 if (opened) {
1566 vfio_group_put(group);
1567 return -EBUSY;
1568 }
1569
1570 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001571 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001572 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001573 vfio_group_put(group);
1574 return -EBUSY;
1575 }
1576
Alex Williamson65b1ade2017-03-21 13:19:09 -06001577 /* Warn if previous user didn't cleanup and re-init to drop them */
1578 if (WARN_ON(group->notifier.head))
1579 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1580
Alex Williamsoncba33452012-07-31 08:16:22 -06001581 filep->private_data = group;
1582
1583 return 0;
1584}
1585
1586static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1587{
1588 struct vfio_group *group = filep->private_data;
1589
1590 filep->private_data = NULL;
1591
1592 vfio_group_try_dissolve_container(group);
1593
Alex Williamson6d6768c2013-06-25 16:06:54 -06001594 atomic_dec(&group->opened);
1595
Alex Williamsoncba33452012-07-31 08:16:22 -06001596 vfio_group_put(group);
1597
1598 return 0;
1599}
1600
1601static const struct file_operations vfio_group_fops = {
1602 .owner = THIS_MODULE,
1603 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1604#ifdef CONFIG_COMPAT
1605 .compat_ioctl = vfio_group_fops_compat_ioctl,
1606#endif
1607 .open = vfio_group_fops_open,
1608 .release = vfio_group_fops_release,
1609};
1610
1611/**
1612 * VFIO Device fd
1613 */
1614static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1615{
1616 struct vfio_device *device = filep->private_data;
1617
1618 device->ops->release(device->device_data);
1619
1620 vfio_group_try_dissolve_container(device->group);
1621
1622 vfio_device_put(device);
1623
1624 return 0;
1625}
1626
1627static long vfio_device_fops_unl_ioctl(struct file *filep,
1628 unsigned int cmd, unsigned long arg)
1629{
1630 struct vfio_device *device = filep->private_data;
1631
1632 if (unlikely(!device->ops->ioctl))
1633 return -EINVAL;
1634
1635 return device->ops->ioctl(device->device_data, cmd, arg);
1636}
1637
1638static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1639 size_t count, loff_t *ppos)
1640{
1641 struct vfio_device *device = filep->private_data;
1642
1643 if (unlikely(!device->ops->read))
1644 return -EINVAL;
1645
1646 return device->ops->read(device->device_data, buf, count, ppos);
1647}
1648
1649static ssize_t vfio_device_fops_write(struct file *filep,
1650 const char __user *buf,
1651 size_t count, loff_t *ppos)
1652{
1653 struct vfio_device *device = filep->private_data;
1654
1655 if (unlikely(!device->ops->write))
1656 return -EINVAL;
1657
1658 return device->ops->write(device->device_data, buf, count, ppos);
1659}
1660
1661static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1662{
1663 struct vfio_device *device = filep->private_data;
1664
1665 if (unlikely(!device->ops->mmap))
1666 return -EINVAL;
1667
1668 return device->ops->mmap(device->device_data, vma);
1669}
1670
1671#ifdef CONFIG_COMPAT
1672static long vfio_device_fops_compat_ioctl(struct file *filep,
1673 unsigned int cmd, unsigned long arg)
1674{
1675 arg = (unsigned long)compat_ptr(arg);
1676 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1677}
1678#endif /* CONFIG_COMPAT */
1679
1680static const struct file_operations vfio_device_fops = {
1681 .owner = THIS_MODULE,
1682 .release = vfio_device_fops_release,
1683 .read = vfio_device_fops_read,
1684 .write = vfio_device_fops_write,
1685 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1686#ifdef CONFIG_COMPAT
1687 .compat_ioctl = vfio_device_fops_compat_ioctl,
1688#endif
1689 .mmap = vfio_device_fops_mmap,
1690};
1691
1692/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001693 * External user API, exported by symbols to be linked dynamically.
1694 *
1695 * The protocol includes:
1696 * 1. do normal VFIO init operation:
1697 * - opening a new container;
1698 * - attaching group(s) to it;
1699 * - setting an IOMMU driver for a container.
1700 * When IOMMU is set for a container, all groups in it are
1701 * considered ready to use by an external user.
1702 *
1703 * 2. User space passes a group fd to an external user.
1704 * The external user calls vfio_group_get_external_user()
1705 * to verify that:
1706 * - the group is initialized;
1707 * - IOMMU is set for it.
1708 * If both checks passed, vfio_group_get_external_user()
1709 * increments the container user counter to prevent
1710 * the VFIO group from disposal before KVM exits.
1711 *
1712 * 3. The external user calls vfio_external_user_iommu_id()
1713 * to know an IOMMU ID.
1714 *
1715 * 4. When the external KVM finishes, it calls
1716 * vfio_group_put_external_user() to release the VFIO group.
1717 * This call decrements the container user counter.
1718 */
1719struct vfio_group *vfio_group_get_external_user(struct file *filep)
1720{
1721 struct vfio_group *group = filep->private_data;
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301722 int ret;
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001723
1724 if (filep->f_op != &vfio_group_fops)
1725 return ERR_PTR(-EINVAL);
1726
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301727 ret = vfio_group_add_container_user(group);
1728 if (ret)
1729 return ERR_PTR(ret);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001730
1731 vfio_group_get(group);
1732
1733 return group;
1734}
1735EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1736
1737void vfio_group_put_external_user(struct vfio_group *group)
1738{
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001739 vfio_group_try_dissolve_container(group);
Ilya Lesokhind370c912016-07-14 16:50:19 +03001740 vfio_group_put(group);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001741}
1742EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1743
1744int vfio_external_user_iommu_id(struct vfio_group *group)
1745{
1746 return iommu_group_id(group->iommu_group);
1747}
1748EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1749
Alex Williamson88d7ab82014-02-26 11:38:39 -07001750long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1751{
1752 return vfio_ioctl_check_extension(group->container, arg);
1753}
1754EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1755
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001756/**
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001757 * Sub-module support
1758 */
1759/*
1760 * Helper for managing a buffer of info chain capabilities, allocate or
1761 * reallocate a buffer with additional @size, filling in @id and @version
1762 * of the capability. A pointer to the new capability is returned.
1763 *
1764 * NB. The chain is based at the head of the buffer, so new entries are
1765 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1766 * next offsets prior to copying to the user buffer.
1767 */
1768struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1769 size_t size, u16 id, u16 version)
1770{
1771 void *buf;
1772 struct vfio_info_cap_header *header, *tmp;
1773
1774 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1775 if (!buf) {
1776 kfree(caps->buf);
1777 caps->size = 0;
1778 return ERR_PTR(-ENOMEM);
1779 }
1780
1781 caps->buf = buf;
1782 header = buf + caps->size;
1783
1784 /* Eventually copied to user buffer, zero */
1785 memset(header, 0, size);
1786
1787 header->id = id;
1788 header->version = version;
1789
1790 /* Add to the end of the capability chain */
Eric Auger5ba6de92016-11-21 07:21:02 +01001791 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001792 ; /* nothing */
1793
1794 tmp->next = caps->size;
1795 caps->size += size;
1796
1797 return header;
1798}
1799EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1800
1801void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1802{
1803 struct vfio_info_cap_header *tmp;
Eric Auger5ba6de92016-11-21 07:21:02 +01001804 void *buf = (void *)caps->buf;
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001805
Eric Auger5ba6de92016-11-21 07:21:02 +01001806 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001807 tmp->next += offset;
1808}
Kirti Wankhedeb3c0a862016-11-17 02:16:25 +05301809EXPORT_SYMBOL(vfio_info_cap_shift);
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001810
Kirti Wankhedeb3c0a862016-11-17 02:16:25 +05301811static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
1812{
1813 struct vfio_info_cap_header *header;
1814 struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
1815 size_t size;
1816
1817 size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas);
1818 header = vfio_info_cap_add(caps, size,
1819 VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
1820 if (IS_ERR(header))
1821 return PTR_ERR(header);
1822
1823 sparse_cap = container_of(header,
1824 struct vfio_region_info_cap_sparse_mmap, header);
1825 sparse_cap->nr_areas = sparse->nr_areas;
1826 memcpy(sparse_cap->areas, sparse->areas,
1827 sparse->nr_areas * sizeof(*sparse->areas));
1828 return 0;
1829}
1830
1831static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
1832{
1833 struct vfio_info_cap_header *header;
1834 struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
1835
1836 header = vfio_info_cap_add(caps, sizeof(*cap),
1837 VFIO_REGION_INFO_CAP_TYPE, 1);
1838 if (IS_ERR(header))
1839 return PTR_ERR(header);
1840
1841 type_cap = container_of(header, struct vfio_region_info_cap_type,
1842 header);
1843 type_cap->type = cap->type;
1844 type_cap->subtype = cap->subtype;
1845 return 0;
1846}
1847
1848int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
1849 void *cap_type)
1850{
1851 int ret = -EINVAL;
1852
1853 if (!cap_type)
1854 return 0;
1855
1856 switch (cap_type_id) {
1857 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1858 ret = sparse_mmap_cap(caps, cap_type);
1859 break;
1860
1861 case VFIO_REGION_INFO_CAP_TYPE:
1862 ret = region_type_cap(caps, cap_type);
1863 break;
1864 }
1865
1866 return ret;
1867}
1868EXPORT_SYMBOL(vfio_info_add_capability);
Kirti Wankhede21690372016-11-17 02:16:17 +05301869
Kirti Wankhedec747f082016-11-17 02:16:27 +05301870int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1871 int max_irq_type, size_t *data_size)
1872{
1873 unsigned long minsz;
1874 size_t size;
1875
1876 minsz = offsetofend(struct vfio_irq_set, count);
1877
1878 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1879 (hdr->count >= (U32_MAX - hdr->start)) ||
1880 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1881 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1882 return -EINVAL;
1883
1884 if (data_size)
1885 *data_size = 0;
1886
1887 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1888 return -EINVAL;
1889
1890 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1891 case VFIO_IRQ_SET_DATA_NONE:
1892 size = 0;
1893 break;
1894 case VFIO_IRQ_SET_DATA_BOOL:
1895 size = sizeof(uint8_t);
1896 break;
1897 case VFIO_IRQ_SET_DATA_EVENTFD:
1898 size = sizeof(int32_t);
1899 break;
1900 default:
1901 return -EINVAL;
1902 }
1903
1904 if (size) {
1905 if (hdr->argsz - minsz < hdr->count * size)
1906 return -EINVAL;
1907
1908 if (!data_size)
1909 return -EINVAL;
1910
1911 *data_size = hdr->count * size;
1912 }
1913
1914 return 0;
1915}
1916EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1917
Kirti Wankhede21690372016-11-17 02:16:17 +05301918/*
1919 * Pin a set of guest PFNs and return their associated host PFNs for local
1920 * domain only.
1921 * @dev [in] : device
Changbin Dud9d84782017-02-06 15:03:37 +08001922 * @user_pfn [in]: array of user/guest PFNs to be pinned.
Kirti Wankhede21690372016-11-17 02:16:17 +05301923 * @npage [in] : count of elements in user_pfn array. This count should not
1924 * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1925 * @prot [in] : protection flags
1926 * @phys_pfn[out]: array of host PFNs
1927 * Return error or number of pages pinned.
1928 */
1929int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1930 int prot, unsigned long *phys_pfn)
1931{
1932 struct vfio_container *container;
1933 struct vfio_group *group;
1934 struct vfio_iommu_driver *driver;
1935 int ret;
1936
1937 if (!dev || !user_pfn || !phys_pfn || !npage)
1938 return -EINVAL;
1939
1940 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1941 return -E2BIG;
1942
1943 group = vfio_group_get_from_dev(dev);
Christophe JAILLETd2564592016-11-30 08:06:12 +01001944 if (!group)
1945 return -ENODEV;
Kirti Wankhede21690372016-11-17 02:16:17 +05301946
1947 ret = vfio_group_add_container_user(group);
1948 if (ret)
1949 goto err_pin_pages;
1950
1951 container = group->container;
1952 down_read(&container->group_lock);
1953
1954 driver = container->iommu_driver;
1955 if (likely(driver && driver->ops->pin_pages))
1956 ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1957 npage, prot, phys_pfn);
1958 else
1959 ret = -ENOTTY;
1960
1961 up_read(&container->group_lock);
1962 vfio_group_try_dissolve_container(group);
1963
1964err_pin_pages:
1965 vfio_group_put(group);
1966 return ret;
1967}
1968EXPORT_SYMBOL(vfio_pin_pages);
1969
1970/*
1971 * Unpin set of host PFNs for local domain only.
1972 * @dev [in] : device
1973 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1974 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1975 * @npage [in] : count of elements in user_pfn array. This count should not
1976 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1977 * Return error or number of pages unpinned.
1978 */
1979int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1980{
1981 struct vfio_container *container;
1982 struct vfio_group *group;
1983 struct vfio_iommu_driver *driver;
1984 int ret;
1985
1986 if (!dev || !user_pfn || !npage)
1987 return -EINVAL;
1988
1989 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1990 return -E2BIG;
1991
1992 group = vfio_group_get_from_dev(dev);
Christophe JAILLETd2564592016-11-30 08:06:12 +01001993 if (!group)
1994 return -ENODEV;
Kirti Wankhede21690372016-11-17 02:16:17 +05301995
1996 ret = vfio_group_add_container_user(group);
1997 if (ret)
1998 goto err_unpin_pages;
1999
2000 container = group->container;
2001 down_read(&container->group_lock);
2002
2003 driver = container->iommu_driver;
2004 if (likely(driver && driver->ops->unpin_pages))
2005 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
2006 npage);
2007 else
2008 ret = -ENOTTY;
2009
2010 up_read(&container->group_lock);
2011 vfio_group_try_dissolve_container(group);
2012
2013err_unpin_pages:
2014 vfio_group_put(group);
2015 return ret;
2016}
2017EXPORT_SYMBOL(vfio_unpin_pages);
2018
Jike Song22195cb2016-12-01 13:20:05 +08002019static int vfio_register_iommu_notifier(struct vfio_group *group,
2020 unsigned long *events,
2021 struct notifier_block *nb)
Kirti Wankhedec086de812016-11-17 10:28:26 +05302022{
2023 struct vfio_container *container;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302024 struct vfio_iommu_driver *driver;
2025 int ret;
2026
Kirti Wankhedec086de812016-11-17 10:28:26 +05302027 ret = vfio_group_add_container_user(group);
2028 if (ret)
Jike Song22195cb2016-12-01 13:20:05 +08002029 return -EINVAL;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302030
2031 container = group->container;
2032 down_read(&container->group_lock);
2033
2034 driver = container->iommu_driver;
2035 if (likely(driver && driver->ops->register_notifier))
Jike Song22195cb2016-12-01 13:20:05 +08002036 ret = driver->ops->register_notifier(container->iommu_data,
2037 events, nb);
Kirti Wankhedec086de812016-11-17 10:28:26 +05302038 else
2039 ret = -ENOTTY;
2040
2041 up_read(&container->group_lock);
2042 vfio_group_try_dissolve_container(group);
2043
Kirti Wankhedec086de812016-11-17 10:28:26 +05302044 return ret;
2045}
Kirti Wankhedec086de812016-11-17 10:28:26 +05302046
Jike Song22195cb2016-12-01 13:20:05 +08002047static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2048 struct notifier_block *nb)
Kirti Wankhedec086de812016-11-17 10:28:26 +05302049{
2050 struct vfio_container *container;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302051 struct vfio_iommu_driver *driver;
2052 int ret;
2053
Kirti Wankhedec086de812016-11-17 10:28:26 +05302054 ret = vfio_group_add_container_user(group);
2055 if (ret)
Jike Song22195cb2016-12-01 13:20:05 +08002056 return -EINVAL;
Kirti Wankhedec086de812016-11-17 10:28:26 +05302057
2058 container = group->container;
2059 down_read(&container->group_lock);
2060
2061 driver = container->iommu_driver;
2062 if (likely(driver && driver->ops->unregister_notifier))
2063 ret = driver->ops->unregister_notifier(container->iommu_data,
2064 nb);
2065 else
2066 ret = -ENOTTY;
2067
2068 up_read(&container->group_lock);
2069 vfio_group_try_dissolve_container(group);
2070
Jike Song22195cb2016-12-01 13:20:05 +08002071 return ret;
2072}
2073
Jike Songccd46db2016-12-01 13:20:06 +08002074void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2075{
2076 group->kvm = kvm;
2077 blocking_notifier_call_chain(&group->notifier,
2078 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2079}
2080EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2081
2082static int vfio_register_group_notifier(struct vfio_group *group,
2083 unsigned long *events,
2084 struct notifier_block *nb)
2085{
2086 struct vfio_container *container;
2087 int ret;
2088 bool set_kvm = false;
2089
2090 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2091 set_kvm = true;
2092
2093 /* clear known events */
2094 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2095
2096 /* refuse to continue if still events remaining */
2097 if (*events)
2098 return -EINVAL;
2099
2100 ret = vfio_group_add_container_user(group);
2101 if (ret)
2102 return -EINVAL;
2103
2104 container = group->container;
2105 down_read(&container->group_lock);
2106
2107 ret = blocking_notifier_chain_register(&group->notifier, nb);
2108
2109 /*
2110 * The attaching of kvm and vfio_group might already happen, so
2111 * here we replay once upon registration.
2112 */
2113 if (!ret && set_kvm && group->kvm)
2114 blocking_notifier_call_chain(&group->notifier,
2115 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2116
2117 up_read(&container->group_lock);
2118 vfio_group_try_dissolve_container(group);
2119
2120 return ret;
2121}
2122
2123static int vfio_unregister_group_notifier(struct vfio_group *group,
2124 struct notifier_block *nb)
2125{
2126 struct vfio_container *container;
2127 int ret;
2128
2129 ret = vfio_group_add_container_user(group);
2130 if (ret)
2131 return -EINVAL;
2132
2133 container = group->container;
2134 down_read(&container->group_lock);
2135
2136 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2137
2138 up_read(&container->group_lock);
2139 vfio_group_try_dissolve_container(group);
2140
2141 return ret;
2142}
2143
Jike Song22195cb2016-12-01 13:20:05 +08002144int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2145 unsigned long *events, struct notifier_block *nb)
2146{
2147 struct vfio_group *group;
2148 int ret;
2149
2150 if (!dev || !nb || !events || (*events == 0))
2151 return -EINVAL;
2152
2153 group = vfio_group_get_from_dev(dev);
2154 if (!group)
2155 return -ENODEV;
2156
2157 switch (type) {
2158 case VFIO_IOMMU_NOTIFY:
2159 ret = vfio_register_iommu_notifier(group, events, nb);
2160 break;
Jike Songccd46db2016-12-01 13:20:06 +08002161 case VFIO_GROUP_NOTIFY:
2162 ret = vfio_register_group_notifier(group, events, nb);
2163 break;
Jike Song22195cb2016-12-01 13:20:05 +08002164 default:
2165 ret = -EINVAL;
2166 }
2167
2168 vfio_group_put(group);
2169 return ret;
2170}
2171EXPORT_SYMBOL(vfio_register_notifier);
2172
2173int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2174 struct notifier_block *nb)
2175{
2176 struct vfio_group *group;
2177 int ret;
2178
2179 if (!dev || !nb)
2180 return -EINVAL;
2181
2182 group = vfio_group_get_from_dev(dev);
2183 if (!group)
2184 return -ENODEV;
2185
2186 switch (type) {
2187 case VFIO_IOMMU_NOTIFY:
2188 ret = vfio_unregister_iommu_notifier(group, nb);
2189 break;
Jike Songccd46db2016-12-01 13:20:06 +08002190 case VFIO_GROUP_NOTIFY:
2191 ret = vfio_unregister_group_notifier(group, nb);
2192 break;
Jike Song22195cb2016-12-01 13:20:05 +08002193 default:
2194 ret = -EINVAL;
2195 }
2196
Kirti Wankhedec086de812016-11-17 10:28:26 +05302197 vfio_group_put(group);
2198 return ret;
2199}
2200EXPORT_SYMBOL(vfio_unregister_notifier);
2201
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07002202/**
Alex Williamsoncba33452012-07-31 08:16:22 -06002203 * Module/class support
2204 */
2205static char *vfio_devnode(struct device *dev, umode_t *mode)
2206{
2207 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2208}
2209
Alex Williamsond1099902013-12-19 10:17:13 -07002210static struct miscdevice vfio_dev = {
2211 .minor = VFIO_MINOR,
2212 .name = "vfio",
2213 .fops = &vfio_fops,
2214 .nodename = "vfio/vfio",
2215 .mode = S_IRUGO | S_IWUGO,
2216};
2217
Alex Williamsoncba33452012-07-31 08:16:22 -06002218static int __init vfio_init(void)
2219{
2220 int ret;
2221
2222 idr_init(&vfio.group_idr);
2223 mutex_init(&vfio.group_lock);
2224 mutex_init(&vfio.iommu_drivers_lock);
2225 INIT_LIST_HEAD(&vfio.group_list);
2226 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2227 init_waitqueue_head(&vfio.release_q);
2228
Alex Williamsond1099902013-12-19 10:17:13 -07002229 ret = misc_register(&vfio_dev);
2230 if (ret) {
2231 pr_err("vfio: misc device register failed\n");
2232 return ret;
2233 }
2234
2235 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06002236 vfio.class = class_create(THIS_MODULE, "vfio");
2237 if (IS_ERR(vfio.class)) {
2238 ret = PTR_ERR(vfio.class);
2239 goto err_class;
2240 }
2241
2242 vfio.class->devnode = vfio_devnode;
2243
Alex Williamsond1099902013-12-19 10:17:13 -07002244 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06002245 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07002246 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06002247
Alex Williamsoncba33452012-07-31 08:16:22 -06002248 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07002249 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06002250 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07002251 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06002252
2253 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2254
Alex Williamson03a76b62015-12-21 15:13:33 -07002255#ifdef CONFIG_VFIO_NOIOMMU
2256 vfio_register_iommu_driver(&vfio_noiommu_ops);
2257#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06002258 return 0;
2259
Alex Williamsond1099902013-12-19 10:17:13 -07002260err_cdev_add:
2261 unregister_chrdev_region(vfio.group_devt, MINORMASK);
2262err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06002263 class_destroy(vfio.class);
2264 vfio.class = NULL;
2265err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07002266 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06002267 return ret;
2268}
2269
2270static void __exit vfio_cleanup(void)
2271{
2272 WARN_ON(!list_empty(&vfio.group_list));
2273
Alex Williamson03a76b62015-12-21 15:13:33 -07002274#ifdef CONFIG_VFIO_NOIOMMU
2275 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2276#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06002277 idr_destroy(&vfio.group_idr);
2278 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07002279 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06002280 class_destroy(vfio.class);
2281 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07002282 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06002283}
2284
2285module_init(vfio_init);
2286module_exit(vfio_cleanup);
2287
2288MODULE_VERSION(DRIVER_VERSION);
2289MODULE_LICENSE("GPL v2");
2290MODULE_AUTHOR(DRIVER_AUTHOR);
2291MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07002292MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2293MODULE_ALIAS("devname:vfio/vfio");
Alex Williamson0ca582f2017-02-08 13:13:26 -07002294MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");