blob: 97d6fb159e8a4ae6d273be35c0ce276b2bb23d08 [file] [log] [blame]
Stefano Stabellini72e59c32017-07-05 13:08:39 -07001/*
2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14
Stefano Stabellinifb029872017-07-06 11:01:00 -070015#include <linux/inet.h>
Stefano Stabellini72e59c32017-07-05 13:08:39 -070016#include <linux/kthread.h>
17#include <linux/list.h>
18#include <linux/radix-tree.h>
19#include <linux/module.h>
20#include <linux/semaphore.h>
21#include <linux/wait.h>
Stefano Stabellinifb029872017-07-06 11:01:00 -070022#include <net/sock.h>
23#include <net/inet_common.h>
24#include <net/inet_connection_sock.h>
25#include <net/request_sock.h>
Stefano Stabellini72e59c32017-07-05 13:08:39 -070026
27#include <xen/events.h>
28#include <xen/grant_table.h>
29#include <xen/xen.h>
30#include <xen/xenbus.h>
31#include <xen/interface/io/pvcalls.h>
32
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -070033#define PVCALLS_VERSIONS "1"
34#define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
35
Stefano Stabellini9be07332017-07-05 13:08:48 -070036struct pvcalls_back_global {
37 struct list_head frontends;
38 struct semaphore frontends_lock;
39} pvcalls_back_global;
40
Stefano Stabellinid0e4d562017-07-06 10:59:29 -070041/*
42 * Per-frontend data structure. It contains pointers to the command
43 * ring, its event channel, a list of active sockets and a tree of
44 * passive sockets.
45 */
46struct pvcalls_fedata {
47 struct list_head list;
48 struct xenbus_device *dev;
49 struct xen_pvcalls_sring *sring;
50 struct xen_pvcalls_back_ring ring;
51 int irq;
52 struct list_head socket_mappings;
53 struct radix_tree_root socketpass_mappings;
54 struct semaphore socket_lock;
55};
56
Stefano Stabellini5db4d282017-07-06 11:01:06 -070057struct pvcalls_ioworker {
58 struct work_struct register_work;
59 struct workqueue_struct *wq;
60};
61
62struct sock_mapping {
63 struct list_head list;
64 struct pvcalls_fedata *fedata;
Stefano Stabellini6f474e72017-07-06 11:01:07 -070065 struct sockpass_mapping *sockpass;
Stefano Stabellini5db4d282017-07-06 11:01:06 -070066 struct socket *sock;
67 uint64_t id;
68 grant_ref_t ref;
69 struct pvcalls_data_intf *ring;
70 void *bytes;
71 struct pvcalls_data data;
72 uint32_t ring_order;
73 int irq;
74 atomic_t read;
75 atomic_t write;
76 atomic_t io;
77 atomic_t release;
78 void (*saved_data_ready)(struct sock *sk);
79 struct pvcalls_ioworker ioworker;
80};
81
Stefano Stabellini331a63e2017-07-06 11:01:06 -070082struct sockpass_mapping {
83 struct list_head list;
84 struct pvcalls_fedata *fedata;
85 struct socket *sock;
86 uint64_t id;
87 struct xen_pvcalls_request reqcopy;
88 spinlock_t copy_lock;
89 struct workqueue_struct *wq;
90 struct work_struct register_work;
91 void (*saved_data_ready)(struct sock *sk);
92};
93
Stefano Stabellini5db4d282017-07-06 11:01:06 -070094static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map);
95static int pvcalls_back_release_active(struct xenbus_device *dev,
96 struct pvcalls_fedata *fedata,
97 struct sock_mapping *map);
98
Stefano Stabellini5d520d82017-07-06 11:01:07 -070099static void pvcalls_conn_back_read(void *opaque)
100{
101}
102
103static void pvcalls_conn_back_write(struct sock_mapping *map)
104{
105}
106
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700107static void pvcalls_back_ioworker(struct work_struct *work)
108{
Stefano Stabellini5d520d82017-07-06 11:01:07 -0700109 struct pvcalls_ioworker *ioworker = container_of(work,
110 struct pvcalls_ioworker, register_work);
111 struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
112 ioworker);
113
114 while (atomic_read(&map->io) > 0) {
115 if (atomic_read(&map->release) > 0) {
116 atomic_set(&map->release, 0);
117 return;
118 }
119
120 if (atomic_read(&map->read) > 0)
121 pvcalls_conn_back_read(map);
122 if (atomic_read(&map->write) > 0)
123 pvcalls_conn_back_write(map);
124
125 atomic_dec(&map->io);
126 }
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700127}
128
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700129static int pvcalls_back_socket(struct xenbus_device *dev,
130 struct xen_pvcalls_request *req)
131{
Stefano Stabellinifb029872017-07-06 11:01:00 -0700132 struct pvcalls_fedata *fedata;
133 int ret;
134 struct xen_pvcalls_response *rsp;
135
136 fedata = dev_get_drvdata(&dev->dev);
137
138 if (req->u.socket.domain != AF_INET ||
139 req->u.socket.type != SOCK_STREAM ||
140 (req->u.socket.protocol != IPPROTO_IP &&
141 req->u.socket.protocol != AF_INET))
142 ret = -EAFNOSUPPORT;
143 else
144 ret = 0;
145
146 /* leave the actual socket allocation for later */
147
148 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
149 rsp->req_id = req->req_id;
150 rsp->cmd = req->cmd;
151 rsp->u.socket.id = req->u.socket.id;
152 rsp->ret = ret;
153
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700154 return 0;
155}
156
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700157static void pvcalls_sk_state_change(struct sock *sock)
158{
159 struct sock_mapping *map = sock->sk_user_data;
160 struct pvcalls_data_intf *intf;
161
162 if (map == NULL)
163 return;
164
165 intf = map->ring;
166 intf->in_error = -ENOTCONN;
167 notify_remote_via_irq(map->irq);
168}
169
170static void pvcalls_sk_data_ready(struct sock *sock)
171{
172}
173
174static struct sock_mapping *pvcalls_new_active_socket(
175 struct pvcalls_fedata *fedata,
176 uint64_t id,
177 grant_ref_t ref,
178 uint32_t evtchn,
179 struct socket *sock)
180{
181 int ret;
182 struct sock_mapping *map;
183 void *page;
184
185 map = kzalloc(sizeof(*map), GFP_KERNEL);
186 if (map == NULL)
187 return NULL;
188
189 map->fedata = fedata;
190 map->sock = sock;
191 map->id = id;
192 map->ref = ref;
193
194 ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page);
195 if (ret < 0)
196 goto out;
197 map->ring = page;
198 map->ring_order = map->ring->ring_order;
199 /* first read the order, then map the data ring */
200 virt_rmb();
201 if (map->ring_order > MAX_RING_ORDER) {
202 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
203 __func__, map->ring_order, MAX_RING_ORDER);
204 goto out;
205 }
206 ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref,
207 (1 << map->ring_order), &page);
208 if (ret < 0)
209 goto out;
210 map->bytes = page;
211
212 ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
213 evtchn,
214 pvcalls_back_conn_event,
215 0,
216 "pvcalls-backend",
217 map);
218 if (ret < 0)
219 goto out;
220 map->irq = ret;
221
222 map->data.in = map->bytes;
223 map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order);
224
225 map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1);
226 if (!map->ioworker.wq)
227 goto out;
228 atomic_set(&map->io, 1);
229 INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker);
230
231 down(&fedata->socket_lock);
232 list_add_tail(&map->list, &fedata->socket_mappings);
233 up(&fedata->socket_lock);
234
235 write_lock_bh(&map->sock->sk->sk_callback_lock);
236 map->saved_data_ready = map->sock->sk->sk_data_ready;
237 map->sock->sk->sk_user_data = map;
238 map->sock->sk->sk_data_ready = pvcalls_sk_data_ready;
239 map->sock->sk->sk_state_change = pvcalls_sk_state_change;
240 write_unlock_bh(&map->sock->sk->sk_callback_lock);
241
242 return map;
243out:
244 down(&fedata->socket_lock);
245 list_del(&map->list);
246 pvcalls_back_release_active(fedata->dev, fedata, map);
247 up(&fedata->socket_lock);
248 return NULL;
249}
250
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700251static int pvcalls_back_connect(struct xenbus_device *dev,
252 struct xen_pvcalls_request *req)
253{
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700254 struct pvcalls_fedata *fedata;
255 int ret = -EINVAL;
256 struct socket *sock;
257 struct sock_mapping *map;
258 struct xen_pvcalls_response *rsp;
259 struct sockaddr *sa = (struct sockaddr *)&req->u.connect.addr;
260
261 fedata = dev_get_drvdata(&dev->dev);
262
263 if (req->u.connect.len < sizeof(sa->sa_family) ||
264 req->u.connect.len > sizeof(req->u.connect.addr) ||
265 sa->sa_family != AF_INET)
266 goto out;
267
268 ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock);
269 if (ret < 0)
270 goto out;
271 ret = inet_stream_connect(sock, sa, req->u.connect.len, 0);
272 if (ret < 0) {
273 sock_release(sock);
274 goto out;
275 }
276
277 map = pvcalls_new_active_socket(fedata,
278 req->u.connect.id,
279 req->u.connect.ref,
280 req->u.connect.evtchn,
281 sock);
282 if (!map) {
283 ret = -EFAULT;
284 sock_release(map->sock);
285 }
286
287out:
288 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
289 rsp->req_id = req->req_id;
290 rsp->cmd = req->cmd;
291 rsp->u.connect.id = req->u.connect.id;
292 rsp->ret = ret;
293
294 return 0;
295}
296
297static int pvcalls_back_release_active(struct xenbus_device *dev,
298 struct pvcalls_fedata *fedata,
299 struct sock_mapping *map)
300{
Stefano Stabellinia51729c2017-07-06 11:01:07 -0700301 disable_irq(map->irq);
302 if (map->sock->sk != NULL) {
303 write_lock_bh(&map->sock->sk->sk_callback_lock);
304 map->sock->sk->sk_user_data = NULL;
305 map->sock->sk->sk_data_ready = map->saved_data_ready;
306 write_unlock_bh(&map->sock->sk->sk_callback_lock);
307 }
308
309 atomic_set(&map->release, 1);
310 flush_work(&map->ioworker.register_work);
311
312 xenbus_unmap_ring_vfree(dev, map->bytes);
313 xenbus_unmap_ring_vfree(dev, (void *)map->ring);
314 unbind_from_irqhandler(map->irq, map);
315
316 sock_release(map->sock);
317 kfree(map);
318
319 return 0;
320}
321
322static int pvcalls_back_release_passive(struct xenbus_device *dev,
323 struct pvcalls_fedata *fedata,
324 struct sockpass_mapping *mappass)
325{
326 if (mappass->sock->sk != NULL) {
327 write_lock_bh(&mappass->sock->sk->sk_callback_lock);
328 mappass->sock->sk->sk_user_data = NULL;
329 mappass->sock->sk->sk_data_ready = mappass->saved_data_ready;
330 write_unlock_bh(&mappass->sock->sk->sk_callback_lock);
331 }
332 sock_release(mappass->sock);
333 flush_workqueue(mappass->wq);
334 destroy_workqueue(mappass->wq);
335 kfree(mappass);
336
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700337 return 0;
338}
339
340static int pvcalls_back_release(struct xenbus_device *dev,
341 struct xen_pvcalls_request *req)
342{
Stefano Stabellinia51729c2017-07-06 11:01:07 -0700343 struct pvcalls_fedata *fedata;
344 struct sock_mapping *map, *n;
345 struct sockpass_mapping *mappass;
346 int ret = 0;
347 struct xen_pvcalls_response *rsp;
348
349 fedata = dev_get_drvdata(&dev->dev);
350
351 down(&fedata->socket_lock);
352 list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) {
353 if (map->id == req->u.release.id) {
354 list_del(&map->list);
355 up(&fedata->socket_lock);
356 ret = pvcalls_back_release_active(dev, fedata, map);
357 goto out;
358 }
359 }
360 mappass = radix_tree_lookup(&fedata->socketpass_mappings,
361 req->u.release.id);
362 if (mappass != NULL) {
363 radix_tree_delete(&fedata->socketpass_mappings, mappass->id);
364 up(&fedata->socket_lock);
365 ret = pvcalls_back_release_passive(dev, fedata, mappass);
366 } else
367 up(&fedata->socket_lock);
368
369out:
370 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
371 rsp->req_id = req->req_id;
372 rsp->u.release.id = req->u.release.id;
373 rsp->cmd = req->cmd;
374 rsp->ret = ret;
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700375 return 0;
376}
377
Stefano Stabellini331a63e2017-07-06 11:01:06 -0700378static void __pvcalls_back_accept(struct work_struct *work)
379{
Stefano Stabellini6f474e72017-07-06 11:01:07 -0700380 struct sockpass_mapping *mappass = container_of(
381 work, struct sockpass_mapping, register_work);
382 struct sock_mapping *map;
383 struct pvcalls_ioworker *iow;
384 struct pvcalls_fedata *fedata;
385 struct socket *sock;
386 struct xen_pvcalls_response *rsp;
387 struct xen_pvcalls_request *req;
388 int notify;
389 int ret = -EINVAL;
390 unsigned long flags;
391
392 fedata = mappass->fedata;
393 /*
394 * __pvcalls_back_accept can race against pvcalls_back_accept.
395 * We only need to check the value of "cmd" on read. It could be
396 * done atomically, but to simplify the code on the write side, we
397 * use a spinlock.
398 */
399 spin_lock_irqsave(&mappass->copy_lock, flags);
400 req = &mappass->reqcopy;
401 if (req->cmd != PVCALLS_ACCEPT) {
402 spin_unlock_irqrestore(&mappass->copy_lock, flags);
403 return;
404 }
405 spin_unlock_irqrestore(&mappass->copy_lock, flags);
406
407 sock = sock_alloc();
408 if (sock == NULL)
409 goto out_error;
410 sock->type = mappass->sock->type;
411 sock->ops = mappass->sock->ops;
412
413 ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true);
414 if (ret == -EAGAIN) {
415 sock_release(sock);
416 goto out_error;
417 }
418
419 map = pvcalls_new_active_socket(fedata,
420 req->u.accept.id_new,
421 req->u.accept.ref,
422 req->u.accept.evtchn,
423 sock);
424 if (!map) {
425 ret = -EFAULT;
426 sock_release(sock);
427 goto out_error;
428 }
429
430 map->sockpass = mappass;
431 iow = &map->ioworker;
432 atomic_inc(&map->read);
433 atomic_inc(&map->io);
434 queue_work(iow->wq, &iow->register_work);
435
436out_error:
437 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
438 rsp->req_id = req->req_id;
439 rsp->cmd = req->cmd;
440 rsp->u.accept.id = req->u.accept.id;
441 rsp->ret = ret;
442 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify);
443 if (notify)
444 notify_remote_via_irq(fedata->irq);
445
446 mappass->reqcopy.cmd = 0;
Stefano Stabellini331a63e2017-07-06 11:01:06 -0700447}
448
449static void pvcalls_pass_sk_data_ready(struct sock *sock)
450{
Stefano Stabellini6f474e72017-07-06 11:01:07 -0700451 struct sockpass_mapping *mappass = sock->sk_user_data;
Stefano Stabellini3cf33a5872017-07-06 11:01:07 -0700452 struct pvcalls_fedata *fedata;
453 struct xen_pvcalls_response *rsp;
454 unsigned long flags;
455 int notify;
Stefano Stabellini6f474e72017-07-06 11:01:07 -0700456
457 if (mappass == NULL)
458 return;
459
Stefano Stabellini3cf33a5872017-07-06 11:01:07 -0700460 fedata = mappass->fedata;
461 spin_lock_irqsave(&mappass->copy_lock, flags);
462 if (mappass->reqcopy.cmd == PVCALLS_POLL) {
463 rsp = RING_GET_RESPONSE(&fedata->ring,
464 fedata->ring.rsp_prod_pvt++);
465 rsp->req_id = mappass->reqcopy.req_id;
466 rsp->u.poll.id = mappass->reqcopy.u.poll.id;
467 rsp->cmd = mappass->reqcopy.cmd;
468 rsp->ret = 0;
469
470 mappass->reqcopy.cmd = 0;
471 spin_unlock_irqrestore(&mappass->copy_lock, flags);
472
473 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata->ring, notify);
474 if (notify)
475 notify_remote_via_irq(mappass->fedata->irq);
476 } else {
477 spin_unlock_irqrestore(&mappass->copy_lock, flags);
478 queue_work(mappass->wq, &mappass->register_work);
479 }
Stefano Stabellini331a63e2017-07-06 11:01:06 -0700480}
481
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700482static int pvcalls_back_bind(struct xenbus_device *dev,
483 struct xen_pvcalls_request *req)
484{
Stefano Stabellini331a63e2017-07-06 11:01:06 -0700485 struct pvcalls_fedata *fedata;
486 int ret;
487 struct sockpass_mapping *map;
488 struct xen_pvcalls_response *rsp;
489
490 fedata = dev_get_drvdata(&dev->dev);
491
492 map = kzalloc(sizeof(*map), GFP_KERNEL);
493 if (map == NULL) {
494 ret = -ENOMEM;
495 goto out;
496 }
497
498 INIT_WORK(&map->register_work, __pvcalls_back_accept);
499 spin_lock_init(&map->copy_lock);
500 map->wq = alloc_workqueue("pvcalls_wq", WQ_UNBOUND, 1);
501 if (!map->wq) {
502 ret = -ENOMEM;
503 goto out;
504 }
505
506 ret = sock_create(AF_INET, SOCK_STREAM, 0, &map->sock);
507 if (ret < 0)
508 goto out;
509
510 ret = inet_bind(map->sock, (struct sockaddr *)&req->u.bind.addr,
511 req->u.bind.len);
512 if (ret < 0)
513 goto out;
514
515 map->fedata = fedata;
516 map->id = req->u.bind.id;
517
518 down(&fedata->socket_lock);
519 ret = radix_tree_insert(&fedata->socketpass_mappings, map->id,
520 map);
521 up(&fedata->socket_lock);
522 if (ret)
523 goto out;
524
525 write_lock_bh(&map->sock->sk->sk_callback_lock);
526 map->saved_data_ready = map->sock->sk->sk_data_ready;
527 map->sock->sk->sk_user_data = map;
528 map->sock->sk->sk_data_ready = pvcalls_pass_sk_data_ready;
529 write_unlock_bh(&map->sock->sk->sk_callback_lock);
530
531out:
532 if (ret) {
533 if (map && map->sock)
534 sock_release(map->sock);
535 if (map && map->wq)
536 destroy_workqueue(map->wq);
537 kfree(map);
538 }
539 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
540 rsp->req_id = req->req_id;
541 rsp->cmd = req->cmd;
542 rsp->u.bind.id = req->u.bind.id;
543 rsp->ret = ret;
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700544 return 0;
545}
546
547static int pvcalls_back_listen(struct xenbus_device *dev,
548 struct xen_pvcalls_request *req)
549{
Stefano Stabellini8ce3f762017-07-06 11:01:06 -0700550 struct pvcalls_fedata *fedata;
551 int ret = -EINVAL;
552 struct sockpass_mapping *map;
553 struct xen_pvcalls_response *rsp;
554
555 fedata = dev_get_drvdata(&dev->dev);
556
557 down(&fedata->socket_lock);
558 map = radix_tree_lookup(&fedata->socketpass_mappings, req->u.listen.id);
559 up(&fedata->socket_lock);
560 if (map == NULL)
561 goto out;
562
563 ret = inet_listen(map->sock, req->u.listen.backlog);
564
565out:
566 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
567 rsp->req_id = req->req_id;
568 rsp->cmd = req->cmd;
569 rsp->u.listen.id = req->u.listen.id;
570 rsp->ret = ret;
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700571 return 0;
572}
573
574static int pvcalls_back_accept(struct xenbus_device *dev,
575 struct xen_pvcalls_request *req)
576{
Stefano Stabellini6f474e72017-07-06 11:01:07 -0700577 struct pvcalls_fedata *fedata;
578 struct sockpass_mapping *mappass;
579 int ret = -EINVAL;
580 struct xen_pvcalls_response *rsp;
581 unsigned long flags;
582
583 fedata = dev_get_drvdata(&dev->dev);
584
585 down(&fedata->socket_lock);
586 mappass = radix_tree_lookup(&fedata->socketpass_mappings,
587 req->u.accept.id);
588 up(&fedata->socket_lock);
589 if (mappass == NULL)
590 goto out_error;
591
592 /*
593 * Limitation of the current implementation: only support one
594 * concurrent accept or poll call on one socket.
595 */
596 spin_lock_irqsave(&mappass->copy_lock, flags);
597 if (mappass->reqcopy.cmd != 0) {
598 spin_unlock_irqrestore(&mappass->copy_lock, flags);
599 ret = -EINTR;
600 goto out_error;
601 }
602
603 mappass->reqcopy = *req;
604 spin_unlock_irqrestore(&mappass->copy_lock, flags);
605 queue_work(mappass->wq, &mappass->register_work);
606
607 /* Tell the caller we don't need to send back a notification yet */
608 return -1;
609
610out_error:
611 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
612 rsp->req_id = req->req_id;
613 rsp->cmd = req->cmd;
614 rsp->u.accept.id = req->u.accept.id;
615 rsp->ret = ret;
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700616 return 0;
617}
618
619static int pvcalls_back_poll(struct xenbus_device *dev,
620 struct xen_pvcalls_request *req)
621{
Stefano Stabellini3cf33a5872017-07-06 11:01:07 -0700622 struct pvcalls_fedata *fedata;
623 struct sockpass_mapping *mappass;
624 struct xen_pvcalls_response *rsp;
625 struct inet_connection_sock *icsk;
626 struct request_sock_queue *queue;
627 unsigned long flags;
628 int ret;
629 bool data;
630
631 fedata = dev_get_drvdata(&dev->dev);
632
633 down(&fedata->socket_lock);
634 mappass = radix_tree_lookup(&fedata->socketpass_mappings,
635 req->u.poll.id);
636 up(&fedata->socket_lock);
637 if (mappass == NULL)
638 return -EINVAL;
639
640 /*
641 * Limitation of the current implementation: only support one
642 * concurrent accept or poll call on one socket.
643 */
644 spin_lock_irqsave(&mappass->copy_lock, flags);
645 if (mappass->reqcopy.cmd != 0) {
646 ret = -EINTR;
647 goto out;
648 }
649
650 mappass->reqcopy = *req;
651 icsk = inet_csk(mappass->sock->sk);
652 queue = &icsk->icsk_accept_queue;
653 data = queue->rskq_accept_head != NULL;
654 if (data) {
655 mappass->reqcopy.cmd = 0;
656 ret = 0;
657 goto out;
658 }
659 spin_unlock_irqrestore(&mappass->copy_lock, flags);
660
661 /* Tell the caller we don't need to send back a notification yet */
662 return -1;
663
664out:
665 spin_unlock_irqrestore(&mappass->copy_lock, flags);
666
667 rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
668 rsp->req_id = req->req_id;
669 rsp->cmd = req->cmd;
670 rsp->u.poll.id = req->u.poll.id;
671 rsp->ret = ret;
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700672 return 0;
673}
674
675static int pvcalls_back_handle_cmd(struct xenbus_device *dev,
676 struct xen_pvcalls_request *req)
677{
678 int ret = 0;
679
680 switch (req->cmd) {
681 case PVCALLS_SOCKET:
682 ret = pvcalls_back_socket(dev, req);
683 break;
684 case PVCALLS_CONNECT:
685 ret = pvcalls_back_connect(dev, req);
686 break;
687 case PVCALLS_RELEASE:
688 ret = pvcalls_back_release(dev, req);
689 break;
690 case PVCALLS_BIND:
691 ret = pvcalls_back_bind(dev, req);
692 break;
693 case PVCALLS_LISTEN:
694 ret = pvcalls_back_listen(dev, req);
695 break;
696 case PVCALLS_ACCEPT:
697 ret = pvcalls_back_accept(dev, req);
698 break;
699 case PVCALLS_POLL:
700 ret = pvcalls_back_poll(dev, req);
701 break;
702 default:
703 {
704 struct pvcalls_fedata *fedata;
705 struct xen_pvcalls_response *rsp;
706
707 fedata = dev_get_drvdata(&dev->dev);
708 rsp = RING_GET_RESPONSE(
709 &fedata->ring, fedata->ring.rsp_prod_pvt++);
710 rsp->req_id = req->req_id;
711 rsp->cmd = req->cmd;
712 rsp->ret = -ENOTSUPP;
713 break;
714 }
715 }
716 return ret;
717}
718
719static void pvcalls_back_work(struct pvcalls_fedata *fedata)
720{
721 int notify, notify_all = 0, more = 1;
722 struct xen_pvcalls_request req;
723 struct xenbus_device *dev = fedata->dev;
724
725 while (more) {
726 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata->ring)) {
727 RING_COPY_REQUEST(&fedata->ring,
728 fedata->ring.req_cons++,
729 &req);
730
731 if (!pvcalls_back_handle_cmd(dev, &req)) {
732 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
733 &fedata->ring, notify);
734 notify_all += notify;
735 }
736 }
737
738 if (notify_all) {
739 notify_remote_via_irq(fedata->irq);
740 notify_all = 0;
741 }
742
743 RING_FINAL_CHECK_FOR_REQUESTS(&fedata->ring, more);
744 }
745}
746
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700747static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
748{
Stefano Stabellinib1efa692017-07-06 11:00:00 -0700749 struct xenbus_device *dev = dev_id;
750 struct pvcalls_fedata *fedata = NULL;
751
752 if (dev == NULL)
753 return IRQ_HANDLED;
754
755 fedata = dev_get_drvdata(&dev->dev);
756 if (fedata == NULL)
757 return IRQ_HANDLED;
758
759 pvcalls_back_work(fedata);
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700760 return IRQ_HANDLED;
761}
762
Stefano Stabellini5db4d282017-07-06 11:01:06 -0700763static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
764{
765 return IRQ_HANDLED;
766}
767
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700768static int backend_connect(struct xenbus_device *dev)
769{
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700770 int err, evtchn;
771 grant_ref_t ring_ref;
772 struct pvcalls_fedata *fedata = NULL;
773
774 fedata = kzalloc(sizeof(struct pvcalls_fedata), GFP_KERNEL);
775 if (!fedata)
776 return -ENOMEM;
777
778 fedata->irq = -1;
779 err = xenbus_scanf(XBT_NIL, dev->otherend, "port", "%u",
780 &evtchn);
781 if (err != 1) {
782 err = -EINVAL;
783 xenbus_dev_fatal(dev, err, "reading %s/event-channel",
784 dev->otherend);
785 goto error;
786 }
787
788 err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", "%u", &ring_ref);
789 if (err != 1) {
790 err = -EINVAL;
791 xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
792 dev->otherend);
793 goto error;
794 }
795
796 err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
797 if (err < 0)
798 goto error;
799 fedata->irq = err;
800
801 err = request_threaded_irq(fedata->irq, NULL, pvcalls_back_event,
802 IRQF_ONESHOT, "pvcalls-back", dev);
803 if (err < 0)
804 goto error;
805
806 err = xenbus_map_ring_valloc(dev, &ring_ref, 1,
807 (void **)&fedata->sring);
808 if (err < 0)
809 goto error;
810
811 BACK_RING_INIT(&fedata->ring, fedata->sring, XEN_PAGE_SIZE * 1);
812 fedata->dev = dev;
813
814 INIT_LIST_HEAD(&fedata->socket_mappings);
815 INIT_RADIX_TREE(&fedata->socketpass_mappings, GFP_KERNEL);
816 sema_init(&fedata->socket_lock, 1);
817 dev_set_drvdata(&dev->dev, fedata);
818
819 down(&pvcalls_back_global.frontends_lock);
820 list_add_tail(&fedata->list, &pvcalls_back_global.frontends);
821 up(&pvcalls_back_global.frontends_lock);
822
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700823 return 0;
Stefano Stabellinid0e4d562017-07-06 10:59:29 -0700824
825 error:
826 if (fedata->irq >= 0)
827 unbind_from_irqhandler(fedata->irq, dev);
828 if (fedata->sring != NULL)
829 xenbus_unmap_ring_vfree(dev, fedata->sring);
830 kfree(fedata);
831 return err;
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700832}
833
834static int backend_disconnect(struct xenbus_device *dev)
835{
Stefano Stabellini0a85d232017-07-06 11:01:07 -0700836 struct pvcalls_fedata *fedata;
837 struct sock_mapping *map, *n;
838 struct sockpass_mapping *mappass;
839 struct radix_tree_iter iter;
840 void **slot;
841
842
843 fedata = dev_get_drvdata(&dev->dev);
844
845 down(&fedata->socket_lock);
846 list_for_each_entry_safe(map, n, &fedata->socket_mappings, list) {
847 list_del(&map->list);
848 pvcalls_back_release_active(dev, fedata, map);
849 }
850
851 radix_tree_for_each_slot(slot, &fedata->socketpass_mappings, &iter, 0) {
852 mappass = radix_tree_deref_slot(slot);
853 if (!mappass)
854 continue;
855 if (radix_tree_exception(mappass)) {
856 if (radix_tree_deref_retry(mappass))
857 slot = radix_tree_iter_retry(&iter);
858 } else {
859 radix_tree_delete(&fedata->socketpass_mappings,
860 mappass->id);
861 pvcalls_back_release_passive(dev, fedata, mappass);
862 }
863 }
864 up(&fedata->socket_lock);
865
866 unbind_from_irqhandler(fedata->irq, dev);
867 xenbus_unmap_ring_vfree(dev, fedata->sring);
868
869 list_del(&fedata->list);
870 kfree(fedata);
871 dev_set_drvdata(&dev->dev, NULL);
872
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700873 return 0;
874}
875
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700876static int pvcalls_back_probe(struct xenbus_device *dev,
877 const struct xenbus_device_id *id)
878{
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700879 int err, abort;
880 struct xenbus_transaction xbt;
881
882again:
883 abort = 1;
884
885 err = xenbus_transaction_start(&xbt);
886 if (err) {
887 pr_warn("%s cannot create xenstore transaction\n", __func__);
888 return err;
889 }
890
891 err = xenbus_printf(xbt, dev->nodename, "versions", "%s",
892 PVCALLS_VERSIONS);
893 if (err) {
894 pr_warn("%s write out 'versions' failed\n", __func__);
895 goto abort;
896 }
897
898 err = xenbus_printf(xbt, dev->nodename, "max-page-order", "%u",
899 MAX_RING_ORDER);
900 if (err) {
901 pr_warn("%s write out 'max-page-order' failed\n", __func__);
902 goto abort;
903 }
904
905 err = xenbus_printf(xbt, dev->nodename, "function-calls",
906 XENBUS_FUNCTIONS_CALLS);
907 if (err) {
908 pr_warn("%s write out 'function-calls' failed\n", __func__);
909 goto abort;
910 }
911
912 abort = 0;
913abort:
914 err = xenbus_transaction_end(xbt, abort);
915 if (err) {
916 if (err == -EAGAIN && !abort)
917 goto again;
918 pr_warn("%s cannot complete xenstore transaction\n", __func__);
919 return err;
920 }
921
922 if (abort)
923 return -EFAULT;
924
925 xenbus_switch_state(dev, XenbusStateInitWait);
926
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700927 return 0;
928}
929
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700930static void set_backend_state(struct xenbus_device *dev,
931 enum xenbus_state state)
932{
933 while (dev->state != state) {
934 switch (dev->state) {
935 case XenbusStateClosed:
936 switch (state) {
937 case XenbusStateInitWait:
938 case XenbusStateConnected:
939 xenbus_switch_state(dev, XenbusStateInitWait);
940 break;
941 case XenbusStateClosing:
942 xenbus_switch_state(dev, XenbusStateClosing);
943 break;
944 default:
945 __WARN();
946 }
947 break;
948 case XenbusStateInitWait:
949 case XenbusStateInitialised:
950 switch (state) {
951 case XenbusStateConnected:
952 backend_connect(dev);
953 xenbus_switch_state(dev, XenbusStateConnected);
954 break;
955 case XenbusStateClosing:
956 case XenbusStateClosed:
957 xenbus_switch_state(dev, XenbusStateClosing);
958 break;
959 default:
960 __WARN();
961 }
962 break;
963 case XenbusStateConnected:
964 switch (state) {
965 case XenbusStateInitWait:
966 case XenbusStateClosing:
967 case XenbusStateClosed:
968 down(&pvcalls_back_global.frontends_lock);
969 backend_disconnect(dev);
970 up(&pvcalls_back_global.frontends_lock);
971 xenbus_switch_state(dev, XenbusStateClosing);
972 break;
973 default:
974 __WARN();
975 }
976 break;
977 case XenbusStateClosing:
978 switch (state) {
979 case XenbusStateInitWait:
980 case XenbusStateConnected:
981 case XenbusStateClosed:
982 xenbus_switch_state(dev, XenbusStateClosed);
983 break;
984 default:
985 __WARN();
986 }
987 break;
988 default:
989 __WARN();
990 }
991 }
992}
993
Stefano Stabellini72e59c32017-07-05 13:08:39 -0700994static void pvcalls_back_changed(struct xenbus_device *dev,
995 enum xenbus_state frontend_state)
996{
Stefano Stabellini0a9c75c2017-07-06 10:59:17 -0700997 switch (frontend_state) {
998 case XenbusStateInitialising:
999 set_backend_state(dev, XenbusStateInitWait);
1000 break;
1001
1002 case XenbusStateInitialised:
1003 case XenbusStateConnected:
1004 set_backend_state(dev, XenbusStateConnected);
1005 break;
1006
1007 case XenbusStateClosing:
1008 set_backend_state(dev, XenbusStateClosing);
1009 break;
1010
1011 case XenbusStateClosed:
1012 set_backend_state(dev, XenbusStateClosed);
1013 if (xenbus_dev_is_online(dev))
1014 break;
1015 device_unregister(&dev->dev);
1016 break;
1017 case XenbusStateUnknown:
1018 set_backend_state(dev, XenbusStateClosed);
1019 device_unregister(&dev->dev);
1020 break;
1021
1022 default:
1023 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
1024 frontend_state);
1025 break;
1026 }
Stefano Stabellini72e59c32017-07-05 13:08:39 -07001027}
1028
1029static int pvcalls_back_remove(struct xenbus_device *dev)
1030{
1031 return 0;
1032}
1033
1034static int pvcalls_back_uevent(struct xenbus_device *xdev,
1035 struct kobj_uevent_env *env)
1036{
1037 return 0;
1038}
1039
1040static const struct xenbus_device_id pvcalls_back_ids[] = {
1041 { "pvcalls" },
1042 { "" }
1043};
1044
1045static struct xenbus_driver pvcalls_back_driver = {
1046 .ids = pvcalls_back_ids,
1047 .probe = pvcalls_back_probe,
1048 .remove = pvcalls_back_remove,
1049 .uevent = pvcalls_back_uevent,
1050 .otherend_changed = pvcalls_back_changed,
1051};
Stefano Stabellini9be07332017-07-05 13:08:48 -07001052
1053static int __init pvcalls_back_init(void)
1054{
1055 int ret;
1056
1057 if (!xen_domain())
1058 return -ENODEV;
1059
1060 ret = xenbus_register_backend(&pvcalls_back_driver);
1061 if (ret < 0)
1062 return ret;
1063
1064 sema_init(&pvcalls_back_global.frontends_lock, 1);
1065 INIT_LIST_HEAD(&pvcalls_back_global.frontends);
1066 return 0;
1067}
1068module_init(pvcalls_back_init);
Stefano Stabellini0a85d232017-07-06 11:01:07 -07001069
1070static void __exit pvcalls_back_fin(void)
1071{
1072 struct pvcalls_fedata *fedata, *nfedata;
1073
1074 down(&pvcalls_back_global.frontends_lock);
1075 list_for_each_entry_safe(fedata, nfedata,
1076 &pvcalls_back_global.frontends, list) {
1077 backend_disconnect(fedata->dev);
1078 }
1079 up(&pvcalls_back_global.frontends_lock);
1080
1081 xenbus_unregister_driver(&pvcalls_back_driver);
1082}
1083
1084module_exit(pvcalls_back_fin);