blob: c0fc774221ca8f2932500490d5744b012f100124 [file] [log] [blame]
Stefano Stabellini72e59c32017-07-05 13:08:39 -07001/*
2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14
Stefano Stabellinifb029872017-07-06 11:01:00 -070015#include <linux/inet.h>
Stefano Stabellini72e59c32017-07-05 13:08:39 -070016#include <linux/kthread.h>
17#include <linux/list.h>
18#include <linux/radix-tree.h>
19#include <linux/module.h>
20#include <linux/semaphore.h>
21#include <linux/wait.h>
Stefano Stabellinifb029872017-07-06 11:01:00 -070022#include <net/sock.h>
23#include <net/inet_common.h>
24#include <net/inet_connection_sock.h>
25#include <net/request_sock.h>
Stefano Stabellini72e59c32017-07-05 13:08:39 -070026
27#include <xen/events.h>
28#include <xen/grant_table.h>
29#include <xen/xen.h>
30#include <xen/xenbus.h>
31#include <xen/interface/io/pvcalls.h>
32
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -070033#define PVCALLS_VERSIONS "1"
34#define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
35
Stefano Stabellini9be07332017-07-05 13:08:48 -070036struct pvcalls_back_global {
37 struct list_head frontends;
38 struct semaphore frontends_lock;
39} pvcalls_back_global;
40
Stefano Stabellinid0e4d562017-07-06 10:59:29 -070041/*
42 * Per-frontend data structure. It contains pointers to the command
43 * ring, its event channel, a list of active sockets and a tree of
44 * passive sockets.
45 */
46struct pvcalls_fedata {
47 struct list_head list;
48 struct xenbus_device *dev;
49 struct xen_pvcalls_sring *sring;
50 struct xen_pvcalls_back_ring ring;
51 int irq;
52 struct list_head socket_mappings;
53 struct radix_tree_root socketpass_mappings;
54 struct semaphore socket_lock;
55};
56
Stefano Stabellini5db4d282017-07-06 11:01:06 -070057struct pvcalls_ioworker {
58 struct work_struct register_work;
59 struct workqueue_struct *wq;
60};
61
62struct sock_mapping {
63 struct list_head list;
64 struct pvcalls_fedata *fedata;
65 struct socket *sock;
66 uint64_t id;
67 grant_ref_t ref;
68 struct pvcalls_data_intf *ring;
69 void *bytes;
70 struct pvcalls_data data;
71 uint32_t ring_order;
72 int irq;
73 atomic_t read;
74 atomic_t write;
75 atomic_t io;
76 atomic_t release;
77 void (*saved_data_ready)(struct sock *sk);
78 struct pvcalls_ioworker ioworker;
79};
80
81static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map);
82static int pvcalls_back_release_active(struct xenbus_device *dev,
83 struct pvcalls_fedata *fedata,
84 struct sock_mapping *map);
85
86static void pvcalls_back_ioworker(struct work_struct *work)
87{
88}
89
Stefano Stabellinib1efa692017-07-06 11:00:00 -070090static int pvcalls_back_socket(struct xenbus_device *dev,
91 struct xen_pvcalls_request *req)
92{
Stefano Stabellinifb029872017-07-06 11:01:00 -070093 struct pvcalls_fedata *fedata;
94 int ret;
95 struct xen_pvcalls_response *rsp;
96
97 fedata = dev_get_drvdata(&dev->dev);
98
99 if (req->u.socket.domain != AF_INET ||
100 req->u.socket.type != SOCK_STREAM ||
101 (req->u.socket.protocol != IPPROTO_IP &&
102 req->u.socket.protocol != AF_INET))
103 ret = -EAFNOSUPPORT;
104 else
105 ret = 0;
106
107 /* leave the actual socket allocation for later */
108
109 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
110 rsp->req_id = req->req_id;
111 rsp->cmd = req->cmd;
112 rsp->u.socket.id = req->u.socket.id;
113 rsp->ret = ret;
114
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700115 return 0;
116}
117
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700118static void pvcalls_sk_state_change(struct sock *sock)
119{
120 struct sock_mapping *map = sock->sk_user_data;
121 struct pvcalls_data_intf *intf;
122
123 if (map == NULL)
124 return;
125
126 intf = map->ring;
127 intf->in_error = -ENOTCONN;
128 notify_remote_via_irq(map->irq);
129}
130
131static void pvcalls_sk_data_ready(struct sock *sock)
132{
133}
134
135static struct sock_mapping *pvcalls_new_active_socket(
136 struct pvcalls_fedata *fedata,
137 uint64_t id,
138 grant_ref_t ref,
139 uint32_t evtchn,
140 struct socket *sock)
141{
142 int ret;
143 struct sock_mapping *map;
144 void *page;
145
146 map = kzalloc(sizeof(*map), GFP_KERNEL);
147 if (map == NULL)
148 return NULL;
149
150 map->fedata = fedata;
151 map->sock = sock;
152 map->id = id;
153 map->ref = ref;
154
155 ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page);
156 if (ret < 0)
157 goto out;
158 map->ring = page;
159 map->ring_order = map->ring->ring_order;
160 /* first read the order, then map the data ring */
161 virt_rmb();
162 if (map->ring_order > MAX_RING_ORDER) {
163 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
164 __func__, map->ring_order, MAX_RING_ORDER);
165 goto out;
166 }
167 ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref,
168 (1 << map->ring_order), &page);
169 if (ret < 0)
170 goto out;
171 map->bytes = page;
172
173 ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
174 evtchn,
175 pvcalls_back_conn_event,
176 0,
177 "pvcalls-backend",
178 map);
179 if (ret < 0)
180 goto out;
181 map->irq = ret;
182
183 map->data.in = map->bytes;
184 map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order);
185
186 map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1);
187 if (!map->ioworker.wq)
188 goto out;
189 atomic_set(&map->io, 1);
190 INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker);
191
192 down(&fedata->socket_lock);
193 list_add_tail(&map->list, &fedata->socket_mappings);
194 up(&fedata->socket_lock);
195
196 write_lock_bh(&map->sock->sk->sk_callback_lock);
197 map->saved_data_ready = map->sock->sk->sk_data_ready;
198 map->sock->sk->sk_user_data = map;
199 map->sock->sk->sk_data_ready = pvcalls_sk_data_ready;
200 map->sock->sk->sk_state_change = pvcalls_sk_state_change;
201 write_unlock_bh(&map->sock->sk->sk_callback_lock);
202
203 return map;
204out:
205 down(&fedata->socket_lock);
206 list_del(&map->list);
207 pvcalls_back_release_active(fedata->dev, fedata, map);
208 up(&fedata->socket_lock);
209 return NULL;
210}
211
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700212static int pvcalls_back_connect(struct xenbus_device *dev,
213 struct xen_pvcalls_request *req)
214{
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700215 struct pvcalls_fedata *fedata;
216 int ret = -EINVAL;
217 struct socket *sock;
218 struct sock_mapping *map;
219 struct xen_pvcalls_response *rsp;
220 struct sockaddr *sa = (struct sockaddr *)&req->u.connect.addr;
221
222 fedata = dev_get_drvdata(&dev->dev);
223
224 if (req->u.connect.len < sizeof(sa->sa_family) ||
225 req->u.connect.len > sizeof(req->u.connect.addr) ||
226 sa->sa_family != AF_INET)
227 goto out;
228
229 ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock);
230 if (ret < 0)
231 goto out;
232 ret = inet_stream_connect(sock, sa, req->u.connect.len, 0);
233 if (ret < 0) {
234 sock_release(sock);
235 goto out;
236 }
237
238 map = pvcalls_new_active_socket(fedata,
239 req->u.connect.id,
240 req->u.connect.ref,
241 req->u.connect.evtchn,
242 sock);
243 if (!map) {
244 ret = -EFAULT;
245 sock_release(map->sock);
246 }
247
248out:
249 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
250 rsp->req_id = req->req_id;
251 rsp->cmd = req->cmd;
252 rsp->u.connect.id = req->u.connect.id;
253 rsp->ret = ret;
254
255 return 0;
256}
257
258static int pvcalls_back_release_active(struct xenbus_device *dev,
259 struct pvcalls_fedata *fedata,
260 struct sock_mapping *map)
261{
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700262 return 0;
263}
264
265static int pvcalls_back_release(struct xenbus_device *dev,
266 struct xen_pvcalls_request *req)
267{
268 return 0;
269}
270
271static int pvcalls_back_bind(struct xenbus_device *dev,
272 struct xen_pvcalls_request *req)
273{
274 return 0;
275}
276
277static int pvcalls_back_listen(struct xenbus_device *dev,
278 struct xen_pvcalls_request *req)
279{
280 return 0;
281}
282
283static int pvcalls_back_accept(struct xenbus_device *dev,
284 struct xen_pvcalls_request *req)
285{
286 return 0;
287}
288
289static int pvcalls_back_poll(struct xenbus_device *dev,
290 struct xen_pvcalls_request *req)
291{
292 return 0;
293}
294
295static int pvcalls_back_handle_cmd(struct xenbus_device *dev,
296 struct xen_pvcalls_request *req)
297{
298 int ret = 0;
299
300 switch (req->cmd) {
301 case PVCALLS_SOCKET:
302 ret = pvcalls_back_socket(dev, req);
303 break;
304 case PVCALLS_CONNECT:
305 ret = pvcalls_back_connect(dev, req);
306 break;
307 case PVCALLS_RELEASE:
308 ret = pvcalls_back_release(dev, req);
309 break;
310 case PVCALLS_BIND:
311 ret = pvcalls_back_bind(dev, req);
312 break;
313 case PVCALLS_LISTEN:
314 ret = pvcalls_back_listen(dev, req);
315 break;
316 case PVCALLS_ACCEPT:
317 ret = pvcalls_back_accept(dev, req);
318 break;
319 case PVCALLS_POLL:
320 ret = pvcalls_back_poll(dev, req);
321 break;
322 default:
323 {
324 struct pvcalls_fedata *fedata;
325 struct xen_pvcalls_response *rsp;
326
327 fedata = dev_get_drvdata(&dev->dev);
328 rsp = RING_GET_RESPONSE(
329 &fedata->ring, fedata->ring.rsp_prod_pvt++);
330 rsp->req_id = req->req_id;
331 rsp->cmd = req->cmd;
332 rsp->ret = -ENOTSUPP;
333 break;
334 }
335 }
336 return ret;
337}
338
339static void pvcalls_back_work(struct pvcalls_fedata *fedata)
340{
341 int notify, notify_all = 0, more = 1;
342 struct xen_pvcalls_request req;
343 struct xenbus_device *dev = fedata->dev;
344
345 while (more) {
346 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata->ring)) {
347 RING_COPY_REQUEST(&fedata->ring,
348 fedata->ring.req_cons++,
349 &req);
350
351 if (!pvcalls_back_handle_cmd(dev, &req)) {
352 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
353 &fedata->ring, notify);
354 notify_all += notify;
355 }
356 }
357
358 if (notify_all) {
359 notify_remote_via_irq(fedata->irq);
360 notify_all = 0;
361 }
362
363 RING_FINAL_CHECK_FOR_REQUESTS(&fedata->ring, more);
364 }
365}
366
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700367static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
368{
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700369 struct xenbus_device *dev = dev_id;
370 struct pvcalls_fedata *fedata = NULL;
371
372 if (dev == NULL)
373 return IRQ_HANDLED;
374
375 fedata = dev_get_drvdata(&dev->dev);
376 if (fedata == NULL)
377 return IRQ_HANDLED;
378
379 pvcalls_back_work(fedata);
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700380 return IRQ_HANDLED;
381}
382
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700383static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
384{
385 return IRQ_HANDLED;
386}
387
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700388static int backend_connect(struct xenbus_device *dev)
389{
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700390 int err, evtchn;
391 grant_ref_t ring_ref;
392 struct pvcalls_fedata *fedata = NULL;
393
394 fedata = kzalloc(sizeof(struct pvcalls_fedata), GFP_KERNEL);
395 if (!fedata)
396 return -ENOMEM;
397
398 fedata->irq = -1;
399 err = xenbus_scanf(XBT_NIL, dev->otherend, "port", "%u",
400 &evtchn);
401 if (err != 1) {
402 err = -EINVAL;
403 xenbus_dev_fatal(dev, err, "reading %s/event-channel",
404 dev->otherend);
405 goto error;
406 }
407
408 err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", "%u", &ring_ref);
409 if (err != 1) {
410 err = -EINVAL;
411 xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
412 dev->otherend);
413 goto error;
414 }
415
416 err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
417 if (err < 0)
418 goto error;
419 fedata->irq = err;
420
421 err = request_threaded_irq(fedata->irq, NULL, pvcalls_back_event,
422 IRQF_ONESHOT, "pvcalls-back", dev);
423 if (err < 0)
424 goto error;
425
426 err = xenbus_map_ring_valloc(dev, &ring_ref, 1,
427 (void **)&fedata->sring);
428 if (err < 0)
429 goto error;
430
431 BACK_RING_INIT(&fedata->ring, fedata->sring, XEN_PAGE_SIZE * 1);
432 fedata->dev = dev;
433
434 INIT_LIST_HEAD(&fedata->socket_mappings);
435 INIT_RADIX_TREE(&fedata->socketpass_mappings, GFP_KERNEL);
436 sema_init(&fedata->socket_lock, 1);
437 dev_set_drvdata(&dev->dev, fedata);
438
439 down(&pvcalls_back_global.frontends_lock);
440 list_add_tail(&fedata->list, &pvcalls_back_global.frontends);
441 up(&pvcalls_back_global.frontends_lock);
442
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700443 return 0;
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700444
445 error:
446 if (fedata->irq >= 0)
447 unbind_from_irqhandler(fedata->irq, dev);
448 if (fedata->sring != NULL)
449 xenbus_unmap_ring_vfree(dev, fedata->sring);
450 kfree(fedata);
451 return err;
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700452}
453
454static int backend_disconnect(struct xenbus_device *dev)
455{
456 return 0;
457}
458
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700459static int pvcalls_back_probe(struct xenbus_device *dev,
460 const struct xenbus_device_id *id)
461{
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700462 int err, abort;
463 struct xenbus_transaction xbt;
464
465again:
466 abort = 1;
467
468 err = xenbus_transaction_start(&xbt);
469 if (err) {
470 pr_warn("%s cannot create xenstore transaction\n", __func__);
471 return err;
472 }
473
474 err = xenbus_printf(xbt, dev->nodename, "versions", "%s",
475 PVCALLS_VERSIONS);
476 if (err) {
477 pr_warn("%s write out 'versions' failed\n", __func__);
478 goto abort;
479 }
480
481 err = xenbus_printf(xbt, dev->nodename, "max-page-order", "%u",
482 MAX_RING_ORDER);
483 if (err) {
484 pr_warn("%s write out 'max-page-order' failed\n", __func__);
485 goto abort;
486 }
487
488 err = xenbus_printf(xbt, dev->nodename, "function-calls",
489 XENBUS_FUNCTIONS_CALLS);
490 if (err) {
491 pr_warn("%s write out 'function-calls' failed\n", __func__);
492 goto abort;
493 }
494
495 abort = 0;
496abort:
497 err = xenbus_transaction_end(xbt, abort);
498 if (err) {
499 if (err == -EAGAIN && !abort)
500 goto again;
501 pr_warn("%s cannot complete xenstore transaction\n", __func__);
502 return err;
503 }
504
505 if (abort)
506 return -EFAULT;
507
508 xenbus_switch_state(dev, XenbusStateInitWait);
509
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700510 return 0;
511}
512
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700513static void set_backend_state(struct xenbus_device *dev,
514 enum xenbus_state state)
515{
516 while (dev->state != state) {
517 switch (dev->state) {
518 case XenbusStateClosed:
519 switch (state) {
520 case XenbusStateInitWait:
521 case XenbusStateConnected:
522 xenbus_switch_state(dev, XenbusStateInitWait);
523 break;
524 case XenbusStateClosing:
525 xenbus_switch_state(dev, XenbusStateClosing);
526 break;
527 default:
528 __WARN();
529 }
530 break;
531 case XenbusStateInitWait:
532 case XenbusStateInitialised:
533 switch (state) {
534 case XenbusStateConnected:
535 backend_connect(dev);
536 xenbus_switch_state(dev, XenbusStateConnected);
537 break;
538 case XenbusStateClosing:
539 case XenbusStateClosed:
540 xenbus_switch_state(dev, XenbusStateClosing);
541 break;
542 default:
543 __WARN();
544 }
545 break;
546 case XenbusStateConnected:
547 switch (state) {
548 case XenbusStateInitWait:
549 case XenbusStateClosing:
550 case XenbusStateClosed:
551 down(&pvcalls_back_global.frontends_lock);
552 backend_disconnect(dev);
553 up(&pvcalls_back_global.frontends_lock);
554 xenbus_switch_state(dev, XenbusStateClosing);
555 break;
556 default:
557 __WARN();
558 }
559 break;
560 case XenbusStateClosing:
561 switch (state) {
562 case XenbusStateInitWait:
563 case XenbusStateConnected:
564 case XenbusStateClosed:
565 xenbus_switch_state(dev, XenbusStateClosed);
566 break;
567 default:
568 __WARN();
569 }
570 break;
571 default:
572 __WARN();
573 }
574 }
575}
576
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700577static void pvcalls_back_changed(struct xenbus_device *dev,
578 enum xenbus_state frontend_state)
579{
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700580 switch (frontend_state) {
581 case XenbusStateInitialising:
582 set_backend_state(dev, XenbusStateInitWait);
583 break;
584
585 case XenbusStateInitialised:
586 case XenbusStateConnected:
587 set_backend_state(dev, XenbusStateConnected);
588 break;
589
590 case XenbusStateClosing:
591 set_backend_state(dev, XenbusStateClosing);
592 break;
593
594 case XenbusStateClosed:
595 set_backend_state(dev, XenbusStateClosed);
596 if (xenbus_dev_is_online(dev))
597 break;
598 device_unregister(&dev->dev);
599 break;
600 case XenbusStateUnknown:
601 set_backend_state(dev, XenbusStateClosed);
602 device_unregister(&dev->dev);
603 break;
604
605 default:
606 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
607 frontend_state);
608 break;
609 }
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700610}
611
612static int pvcalls_back_remove(struct xenbus_device *dev)
613{
614 return 0;
615}
616
617static int pvcalls_back_uevent(struct xenbus_device *xdev,
618 struct kobj_uevent_env *env)
619{
620 return 0;
621}
622
623static const struct xenbus_device_id pvcalls_back_ids[] = {
624 { "pvcalls" },
625 { "" }
626};
627
628static struct xenbus_driver pvcalls_back_driver = {
629 .ids = pvcalls_back_ids,
630 .probe = pvcalls_back_probe,
631 .remove = pvcalls_back_remove,
632 .uevent = pvcalls_back_uevent,
633 .otherend_changed = pvcalls_back_changed,
634};
Stefano Stabellini9be07332017-07-05 13:08:48 -0700635
636static int __init pvcalls_back_init(void)
637{
638 int ret;
639
640 if (!xen_domain())
641 return -ENODEV;
642
643 ret = xenbus_register_backend(&pvcalls_back_driver);
644 if (ret < 0)
645 return ret;
646
647 sema_init(&pvcalls_back_global.frontends_lock, 1);
648 INIT_LIST_HEAD(&pvcalls_back_global.frontends);
649 return 0;
650}
651module_init(pvcalls_back_init);