blob: 6e70ec3ea796d781455eff2e1030a0dd98b881d4 [file] [log] [blame]
Stefano Stabellini416efba2017-10-30 15:40:51 -07001/*
2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14
15#include <linux/module.h>
Stefano Stabellini21950462017-10-30 15:40:54 -070016#include <linux/net.h>
17#include <linux/socket.h>
18
19#include <net/sock.h>
Stefano Stabellini416efba2017-10-30 15:40:51 -070020
21#include <xen/events.h>
22#include <xen/grant_table.h>
23#include <xen/xen.h>
24#include <xen/xenbus.h>
25#include <xen/interface/io/pvcalls.h>
26
Stefano Stabellini21950462017-10-30 15:40:54 -070027#include "pvcalls-front.h"
28
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -070029#define PVCALLS_INVALID_ID UINT_MAX
30#define PVCALLS_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
31#define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
32
33struct pvcalls_bedata {
34 struct xen_pvcalls_front_ring ring;
35 grant_ref_t ref;
36 int irq;
37
38 struct list_head socket_mappings;
39 spinlock_t socket_lock;
40
41 wait_queue_head_t inflight_req;
42 struct xen_pvcalls_response rsp[PVCALLS_NR_RSP_PER_RING];
43};
44/* Only one front/back connection supported. */
45static struct xenbus_device *pvcalls_front_dev;
46static atomic_t pvcalls_refcount;
47
48/* first increment refcount, then proceed */
49#define pvcalls_enter() { \
50 atomic_inc(&pvcalls_refcount); \
51}
52
53/* first complete other operations, then decrement refcount */
54#define pvcalls_exit() { \
55 atomic_dec(&pvcalls_refcount); \
56}
57
58struct sock_mapping {
59 bool active_socket;
60 struct list_head list;
61 struct socket *sock;
Stefano Stabellinicb1c7d92017-10-30 15:40:55 -070062 union {
63 struct {
64 int irq;
65 grant_ref_t ref;
66 struct pvcalls_data_intf *ring;
67 struct pvcalls_data data;
68 struct mutex in_mutex;
69 struct mutex out_mutex;
70
71 wait_queue_head_t inflight_conn_req;
72 } active;
Stefano Stabellini67ea9892017-10-30 15:40:56 -070073 struct {
74 /* Socket status */
75#define PVCALLS_STATUS_UNINITALIZED 0
76#define PVCALLS_STATUS_BIND 1
77#define PVCALLS_STATUS_LISTEN 2
78 uint8_t status;
79 } passive;
Stefano Stabellinicb1c7d92017-10-30 15:40:55 -070080 };
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -070081};
82
Stefano Stabellini21950462017-10-30 15:40:54 -070083static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
84{
85 *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
86 if (RING_FULL(&bedata->ring) ||
87 bedata->rsp[*req_id].req_id != PVCALLS_INVALID_ID)
88 return -EAGAIN;
89 return 0;
90}
91
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -070092static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
93{
Stefano Stabellini21950462017-10-30 15:40:54 -070094 struct xenbus_device *dev = dev_id;
95 struct pvcalls_bedata *bedata;
96 struct xen_pvcalls_response *rsp;
97 uint8_t *src, *dst;
98 int req_id = 0, more = 0, done = 0;
99
100 if (dev == NULL)
101 return IRQ_HANDLED;
102
103 pvcalls_enter();
104 bedata = dev_get_drvdata(&dev->dev);
105 if (bedata == NULL) {
106 pvcalls_exit();
107 return IRQ_HANDLED;
108 }
109
110again:
111 while (RING_HAS_UNCONSUMED_RESPONSES(&bedata->ring)) {
112 rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
113
114 req_id = rsp->req_id;
115 dst = (uint8_t *)&bedata->rsp[req_id] + sizeof(rsp->req_id);
116 src = (uint8_t *)rsp + sizeof(rsp->req_id);
117 memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
118 /*
119 * First copy the rest of the data, then req_id. It is
120 * paired with the barrier when accessing bedata->rsp.
121 */
122 smp_wmb();
123 bedata->rsp[req_id].req_id = rsp->req_id;
124
125 done = 1;
126 bedata->ring.rsp_cons++;
127 }
128
129 RING_FINAL_CHECK_FOR_RESPONSES(&bedata->ring, more);
130 if (more)
131 goto again;
132 if (done)
133 wake_up(&bedata->inflight_req);
134 pvcalls_exit();
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -0700135 return IRQ_HANDLED;
136}
137
138static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
139 struct sock_mapping *map)
140{
141}
142
Stefano Stabellinicb1c7d92017-10-30 15:40:55 -0700143static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
144{
145 struct sock_mapping *map = sock_map;
146
147 if (map == NULL)
148 return IRQ_HANDLED;
149
150 wake_up_interruptible(&map->active.inflight_conn_req);
151
152 return IRQ_HANDLED;
153}
154
Stefano Stabellini21950462017-10-30 15:40:54 -0700155int pvcalls_front_socket(struct socket *sock)
156{
157 struct pvcalls_bedata *bedata;
158 struct sock_mapping *map = NULL;
159 struct xen_pvcalls_request *req;
160 int notify, req_id, ret;
161
162 /*
163 * PVCalls only supports domain AF_INET,
164 * type SOCK_STREAM and protocol 0 sockets for now.
165 *
166 * Check socket type here, AF_INET and protocol checks are done
167 * by the caller.
168 */
169 if (sock->type != SOCK_STREAM)
170 return -EOPNOTSUPP;
171
172 pvcalls_enter();
173 if (!pvcalls_front_dev) {
174 pvcalls_exit();
175 return -EACCES;
176 }
177 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
178
179 map = kzalloc(sizeof(*map), GFP_KERNEL);
180 if (map == NULL) {
181 pvcalls_exit();
182 return -ENOMEM;
183 }
184
185 spin_lock(&bedata->socket_lock);
186
187 ret = get_request(bedata, &req_id);
188 if (ret < 0) {
189 kfree(map);
190 spin_unlock(&bedata->socket_lock);
191 pvcalls_exit();
192 return ret;
193 }
194
195 /*
196 * sock->sk->sk_send_head is not used for ip sockets: reuse the
197 * field to store a pointer to the struct sock_mapping
198 * corresponding to the socket. This way, we can easily get the
199 * struct sock_mapping from the struct socket.
200 */
201 sock->sk->sk_send_head = (void *)map;
202 list_add_tail(&map->list, &bedata->socket_mappings);
203
204 req = RING_GET_REQUEST(&bedata->ring, req_id);
205 req->req_id = req_id;
206 req->cmd = PVCALLS_SOCKET;
207 req->u.socket.id = (uintptr_t) map;
208 req->u.socket.domain = AF_INET;
209 req->u.socket.type = SOCK_STREAM;
210 req->u.socket.protocol = IPPROTO_IP;
211
212 bedata->ring.req_prod_pvt++;
213 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
214 spin_unlock(&bedata->socket_lock);
215 if (notify)
216 notify_remote_via_irq(bedata->irq);
217
218 wait_event(bedata->inflight_req,
219 READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
220
221 /* read req_id, then the content */
222 smp_rmb();
223 ret = bedata->rsp[req_id].ret;
224 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
225
226 pvcalls_exit();
227 return ret;
228}
229
Stefano Stabellinicb1c7d92017-10-30 15:40:55 -0700230static int create_active(struct sock_mapping *map, int *evtchn)
231{
232 void *bytes;
233 int ret = -ENOMEM, irq = -1, i;
234
235 *evtchn = -1;
236 init_waitqueue_head(&map->active.inflight_conn_req);
237
238 map->active.ring = (struct pvcalls_data_intf *)
239 __get_free_page(GFP_KERNEL | __GFP_ZERO);
240 if (map->active.ring == NULL)
241 goto out_error;
242 map->active.ring->ring_order = PVCALLS_RING_ORDER;
243 bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
244 PVCALLS_RING_ORDER);
245 if (bytes == NULL)
246 goto out_error;
247 for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
248 map->active.ring->ref[i] = gnttab_grant_foreign_access(
249 pvcalls_front_dev->otherend_id,
250 pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
251
252 map->active.ref = gnttab_grant_foreign_access(
253 pvcalls_front_dev->otherend_id,
254 pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
255
256 map->active.data.in = bytes;
257 map->active.data.out = bytes +
258 XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
259
260 ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
261 if (ret)
262 goto out_error;
263 irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
264 0, "pvcalls-frontend", map);
265 if (irq < 0) {
266 ret = irq;
267 goto out_error;
268 }
269
270 map->active.irq = irq;
271 map->active_socket = true;
272 mutex_init(&map->active.in_mutex);
273 mutex_init(&map->active.out_mutex);
274
275 return 0;
276
277out_error:
278 if (irq >= 0)
279 unbind_from_irqhandler(irq, map);
280 else if (*evtchn >= 0)
281 xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
282 kfree(map->active.data.in);
283 kfree(map->active.ring);
284 return ret;
285}
286
287int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
288 int addr_len, int flags)
289{
290 struct pvcalls_bedata *bedata;
291 struct sock_mapping *map = NULL;
292 struct xen_pvcalls_request *req;
293 int notify, req_id, ret, evtchn;
294
295 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
296 return -EOPNOTSUPP;
297
298 pvcalls_enter();
299 if (!pvcalls_front_dev) {
300 pvcalls_exit();
301 return -ENOTCONN;
302 }
303
304 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
305
306 map = (struct sock_mapping *)sock->sk->sk_send_head;
307 if (!map) {
308 pvcalls_exit();
309 return -ENOTSOCK;
310 }
311
312 spin_lock(&bedata->socket_lock);
313 ret = get_request(bedata, &req_id);
314 if (ret < 0) {
315 spin_unlock(&bedata->socket_lock);
316 pvcalls_exit();
317 return ret;
318 }
319 ret = create_active(map, &evtchn);
320 if (ret < 0) {
321 spin_unlock(&bedata->socket_lock);
322 pvcalls_exit();
323 return ret;
324 }
325
326 req = RING_GET_REQUEST(&bedata->ring, req_id);
327 req->req_id = req_id;
328 req->cmd = PVCALLS_CONNECT;
329 req->u.connect.id = (uintptr_t)map;
330 req->u.connect.len = addr_len;
331 req->u.connect.flags = flags;
332 req->u.connect.ref = map->active.ref;
333 req->u.connect.evtchn = evtchn;
334 memcpy(req->u.connect.addr, addr, sizeof(*addr));
335
336 map->sock = sock;
337
338 bedata->ring.req_prod_pvt++;
339 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
340 spin_unlock(&bedata->socket_lock);
341
342 if (notify)
343 notify_remote_via_irq(bedata->irq);
344
345 wait_event(bedata->inflight_req,
346 READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
347
348 /* read req_id, then the content */
349 smp_rmb();
350 ret = bedata->rsp[req_id].ret;
351 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
352 pvcalls_exit();
353 return ret;
354}
355
Stefano Stabellini67ea9892017-10-30 15:40:56 -0700356int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
357{
358 struct pvcalls_bedata *bedata;
359 struct sock_mapping *map = NULL;
360 struct xen_pvcalls_request *req;
361 int notify, req_id, ret;
362
363 if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
364 return -EOPNOTSUPP;
365
366 pvcalls_enter();
367 if (!pvcalls_front_dev) {
368 pvcalls_exit();
369 return -ENOTCONN;
370 }
371 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
372
373 map = (struct sock_mapping *) sock->sk->sk_send_head;
374 if (map == NULL) {
375 pvcalls_exit();
376 return -ENOTSOCK;
377 }
378
379 spin_lock(&bedata->socket_lock);
380 ret = get_request(bedata, &req_id);
381 if (ret < 0) {
382 spin_unlock(&bedata->socket_lock);
383 pvcalls_exit();
384 return ret;
385 }
386 req = RING_GET_REQUEST(&bedata->ring, req_id);
387 req->req_id = req_id;
388 map->sock = sock;
389 req->cmd = PVCALLS_BIND;
390 req->u.bind.id = (uintptr_t)map;
391 memcpy(req->u.bind.addr, addr, sizeof(*addr));
392 req->u.bind.len = addr_len;
393
394 map->active_socket = false;
395
396 bedata->ring.req_prod_pvt++;
397 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
398 spin_unlock(&bedata->socket_lock);
399 if (notify)
400 notify_remote_via_irq(bedata->irq);
401
402 wait_event(bedata->inflight_req,
403 READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
404
405 /* read req_id, then the content */
406 smp_rmb();
407 ret = bedata->rsp[req_id].ret;
408 bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
409
410 map->passive.status = PVCALLS_STATUS_BIND;
411 pvcalls_exit();
412 return 0;
413}
414
Stefano Stabellini416efba2017-10-30 15:40:51 -0700415static const struct xenbus_device_id pvcalls_front_ids[] = {
416 { "pvcalls" },
417 { "" }
418};
419
420static int pvcalls_front_remove(struct xenbus_device *dev)
421{
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -0700422 struct pvcalls_bedata *bedata;
423 struct sock_mapping *map = NULL, *n;
424
425 bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
426 dev_set_drvdata(&dev->dev, NULL);
427 pvcalls_front_dev = NULL;
428 if (bedata->irq >= 0)
429 unbind_from_irqhandler(bedata->irq, dev);
430
Stefano Stabellinicb1c7d92017-10-30 15:40:55 -0700431 list_for_each_entry_safe(map, n, &bedata->socket_mappings, list) {
432 map->sock->sk->sk_send_head = NULL;
433 if (map->active_socket) {
434 map->active.ring->in_error = -EBADF;
435 wake_up_interruptible(&map->active.inflight_conn_req);
436 }
437 }
438
Stefano Stabelliniaa7ba372017-10-30 15:40:52 -0700439 smp_mb();
440 while (atomic_read(&pvcalls_refcount) > 0)
441 cpu_relax();
442 list_for_each_entry_safe(map, n, &bedata->socket_mappings, list) {
443 if (map->active_socket) {
444 /* No need to lock, refcount is 0 */
445 pvcalls_front_free_map(bedata, map);
446 } else {
447 list_del(&map->list);
448 kfree(map);
449 }
450 }
451 if (bedata->ref >= 0)
452 gnttab_end_foreign_access(bedata->ref, 0, 0);
453 kfree(bedata->ring.sring);
454 kfree(bedata);
455 xenbus_switch_state(dev, XenbusStateClosed);
Stefano Stabellini416efba2017-10-30 15:40:51 -0700456 return 0;
457}
458
459static int pvcalls_front_probe(struct xenbus_device *dev,
460 const struct xenbus_device_id *id)
461{
Stefano Stabellini21968192017-10-30 15:40:53 -0700462 int ret = -ENOMEM, evtchn, i;
463 unsigned int max_page_order, function_calls, len;
464 char *versions;
465 grant_ref_t gref_head = 0;
466 struct xenbus_transaction xbt;
467 struct pvcalls_bedata *bedata = NULL;
468 struct xen_pvcalls_sring *sring;
469
470 if (pvcalls_front_dev != NULL) {
471 dev_err(&dev->dev, "only one PV Calls connection supported\n");
472 return -EINVAL;
473 }
474
475 versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
476 if (!len)
477 return -EINVAL;
478 if (strcmp(versions, "1")) {
479 kfree(versions);
480 return -EINVAL;
481 }
482 kfree(versions);
483 max_page_order = xenbus_read_unsigned(dev->otherend,
484 "max-page-order", 0);
485 if (max_page_order < PVCALLS_RING_ORDER)
486 return -ENODEV;
487 function_calls = xenbus_read_unsigned(dev->otherend,
488 "function-calls", 0);
489 /* See XENBUS_FUNCTIONS_CALLS in pvcalls.h */
490 if (function_calls != 1)
491 return -ENODEV;
492 pr_info("%s max-page-order is %u\n", __func__, max_page_order);
493
494 bedata = kzalloc(sizeof(struct pvcalls_bedata), GFP_KERNEL);
495 if (!bedata)
496 return -ENOMEM;
497
498 dev_set_drvdata(&dev->dev, bedata);
499 pvcalls_front_dev = dev;
500 init_waitqueue_head(&bedata->inflight_req);
501 INIT_LIST_HEAD(&bedata->socket_mappings);
502 spin_lock_init(&bedata->socket_lock);
503 bedata->irq = -1;
504 bedata->ref = -1;
505
506 for (i = 0; i < PVCALLS_NR_RSP_PER_RING; i++)
507 bedata->rsp[i].req_id = PVCALLS_INVALID_ID;
508
509 sring = (struct xen_pvcalls_sring *) __get_free_page(GFP_KERNEL |
510 __GFP_ZERO);
511 if (!sring)
512 goto error;
513 SHARED_RING_INIT(sring);
514 FRONT_RING_INIT(&bedata->ring, sring, XEN_PAGE_SIZE);
515
516 ret = xenbus_alloc_evtchn(dev, &evtchn);
517 if (ret)
518 goto error;
519
520 bedata->irq = bind_evtchn_to_irqhandler(evtchn,
521 pvcalls_front_event_handler,
522 0, "pvcalls-frontend", dev);
523 if (bedata->irq < 0) {
524 ret = bedata->irq;
525 goto error;
526 }
527
528 ret = gnttab_alloc_grant_references(1, &gref_head);
529 if (ret < 0)
530 goto error;
531 bedata->ref = gnttab_claim_grant_reference(&gref_head);
532 if (bedata->ref < 0) {
533 ret = bedata->ref;
534 goto error;
535 }
536 gnttab_grant_foreign_access_ref(bedata->ref, dev->otherend_id,
537 virt_to_gfn((void *)sring), 0);
538
539 again:
540 ret = xenbus_transaction_start(&xbt);
541 if (ret) {
542 xenbus_dev_fatal(dev, ret, "starting transaction");
543 goto error;
544 }
545 ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
546 if (ret)
547 goto error_xenbus;
548 ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", bedata->ref);
549 if (ret)
550 goto error_xenbus;
551 ret = xenbus_printf(xbt, dev->nodename, "port", "%u",
552 evtchn);
553 if (ret)
554 goto error_xenbus;
555 ret = xenbus_transaction_end(xbt, 0);
556 if (ret) {
557 if (ret == -EAGAIN)
558 goto again;
559 xenbus_dev_fatal(dev, ret, "completing transaction");
560 goto error;
561 }
562 xenbus_switch_state(dev, XenbusStateInitialised);
563
Stefano Stabellini416efba2017-10-30 15:40:51 -0700564 return 0;
Stefano Stabellini21968192017-10-30 15:40:53 -0700565
566 error_xenbus:
567 xenbus_transaction_end(xbt, 1);
568 xenbus_dev_fatal(dev, ret, "writing xenstore");
569 error:
570 pvcalls_front_remove(dev);
571 return ret;
Stefano Stabellini416efba2017-10-30 15:40:51 -0700572}
573
574static void pvcalls_front_changed(struct xenbus_device *dev,
575 enum xenbus_state backend_state)
576{
Stefano Stabellini21968192017-10-30 15:40:53 -0700577 switch (backend_state) {
578 case XenbusStateReconfiguring:
579 case XenbusStateReconfigured:
580 case XenbusStateInitialising:
581 case XenbusStateInitialised:
582 case XenbusStateUnknown:
583 break;
584
585 case XenbusStateInitWait:
586 break;
587
588 case XenbusStateConnected:
589 xenbus_switch_state(dev, XenbusStateConnected);
590 break;
591
592 case XenbusStateClosed:
593 if (dev->state == XenbusStateClosed)
594 break;
595 /* Missed the backend's CLOSING state -- fallthrough */
596 case XenbusStateClosing:
597 xenbus_frontend_closed(dev);
598 break;
599 }
Stefano Stabellini416efba2017-10-30 15:40:51 -0700600}
601
602static struct xenbus_driver pvcalls_front_driver = {
603 .ids = pvcalls_front_ids,
604 .probe = pvcalls_front_probe,
605 .remove = pvcalls_front_remove,
606 .otherend_changed = pvcalls_front_changed,
607};
608
609static int __init pvcalls_frontend_init(void)
610{
611 if (!xen_domain())
612 return -ENODEV;
613
614 pr_info("Initialising Xen pvcalls frontend driver\n");
615
616 return xenbus_register_frontend(&pvcalls_front_driver);
617}
618
619module_init(pvcalls_frontend_init);