kvm: destroy emulated devices on VM exit
[linux-3.10.git] / arch / powerpc / kvm / mpic.c
1 /*
2  * OpenPIC emulation
3  *
4  * Copyright (c) 2004 Jocelyn Mayer
5  *               2011 Alexander Graf
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25
26 #include <linux/slab.h>
27 #include <linux/mutex.h>
28 #include <linux/kvm_host.h>
29 #include <linux/errno.h>
30 #include <linux/fs.h>
31 #include <linux/anon_inodes.h>
32 #include <asm/uaccess.h>
33 #include <asm/mpic.h>
34 #include <asm/kvm_para.h>
35 #include <asm/kvm_host.h>
36 #include <asm/kvm_ppc.h>
37 #include "iodev.h"
38
39 #define MAX_CPU     32
40 #define MAX_SRC     256
41 #define MAX_TMR     4
42 #define MAX_IPI     4
43 #define MAX_MSI     8
44 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
45 #define VID         0x03        /* MPIC version ID */
46
47 /* OpenPIC capability flags */
48 #define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
49 #define OPENPIC_FLAG_ILR          (2 << 0)
50
51 /* OpenPIC address map */
52 #define OPENPIC_REG_SIZE             0x40000
53 #define OPENPIC_GLB_REG_START        0x0
54 #define OPENPIC_GLB_REG_SIZE         0x10F0
55 #define OPENPIC_TMR_REG_START        0x10F0
56 #define OPENPIC_TMR_REG_SIZE         0x220
57 #define OPENPIC_MSI_REG_START        0x1600
58 #define OPENPIC_MSI_REG_SIZE         0x200
59 #define OPENPIC_SUMMARY_REG_START    0x3800
60 #define OPENPIC_SUMMARY_REG_SIZE     0x800
61 #define OPENPIC_SRC_REG_START        0x10000
62 #define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
63 #define OPENPIC_CPU_REG_START        0x20000
64 #define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
65
66 struct fsl_mpic_info {
67         int max_ext;
68 };
69
70 static struct fsl_mpic_info fsl_mpic_20 = {
71         .max_ext = 12,
72 };
73
74 static struct fsl_mpic_info fsl_mpic_42 = {
75         .max_ext = 12,
76 };
77
78 #define FRR_NIRQ_SHIFT    16
79 #define FRR_NCPU_SHIFT     8
80 #define FRR_VID_SHIFT      0
81
82 #define VID_REVISION_1_2   2
83 #define VID_REVISION_1_3   3
84
85 #define VIR_GENERIC      0x00000000     /* Generic Vendor ID */
86
87 #define GCR_RESET        0x80000000
88 #define GCR_MODE_PASS    0x00000000
89 #define GCR_MODE_MIXED   0x20000000
90 #define GCR_MODE_PROXY   0x60000000
91
92 #define TBCR_CI           0x80000000    /* count inhibit */
93 #define TCCR_TOG          0x80000000    /* toggles when decrement to zero */
94
95 #define IDR_EP_SHIFT      31
96 #define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
97 #define IDR_CI0_SHIFT     30
98 #define IDR_CI1_SHIFT     29
99 #define IDR_P1_SHIFT      1
100 #define IDR_P0_SHIFT      0
101
102 #define ILR_INTTGT_MASK   0x000000ff
103 #define ILR_INTTGT_INT    0x00
104 #define ILR_INTTGT_CINT   0x01  /* critical */
105 #define ILR_INTTGT_MCP    0x02  /* machine check */
106 #define NUM_OUTPUTS       3
107
108 #define MSIIR_OFFSET       0x140
109 #define MSIIR_SRS_SHIFT    29
110 #define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
111 #define MSIIR_IBS_SHIFT    24
112 #define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
113
114 static int get_current_cpu(void)
115 {
116 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
117         struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
118         return vcpu ? vcpu->arch.irq_cpu_id : -1;
119 #else
120         /* XXX */
121         return -1;
122 #endif
123 }
124
125 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
126                                       u32 val, int idx);
127 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
128                                      u32 *ptr, int idx);
129
130 enum irq_type {
131         IRQ_TYPE_NORMAL = 0,
132         IRQ_TYPE_FSLINT,        /* FSL internal interrupt -- level only */
133         IRQ_TYPE_FSLSPECIAL,    /* FSL timer/IPI interrupt, edge, no polarity */
134 };
135
136 struct irq_queue {
137         /* Round up to the nearest 64 IRQs so that the queue length
138          * won't change when moving between 32 and 64 bit hosts.
139          */
140         unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
141         int next;
142         int priority;
143 };
144
145 struct irq_source {
146         uint32_t ivpr;          /* IRQ vector/priority register */
147         uint32_t idr;           /* IRQ destination register */
148         uint32_t destmask;      /* bitmap of CPU destinations */
149         int last_cpu;
150         int output;             /* IRQ level, e.g. ILR_INTTGT_INT */
151         int pending;            /* TRUE if IRQ is pending */
152         enum irq_type type;
153         bool level:1;           /* level-triggered */
154         bool nomask:1;  /* critical interrupts ignore mask on some FSL MPICs */
155 };
156
157 #define IVPR_MASK_SHIFT       31
158 #define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
159 #define IVPR_ACTIVITY_SHIFT   30
160 #define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
161 #define IVPR_MODE_SHIFT       29
162 #define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
163 #define IVPR_POLARITY_SHIFT   23
164 #define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
165 #define IVPR_SENSE_SHIFT      22
166 #define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
167
168 #define IVPR_PRIORITY_MASK     (0xF << 16)
169 #define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
170 #define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
171
172 /* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
173 #define IDR_EP      0x80000000  /* external pin */
174 #define IDR_CI      0x40000000  /* critical interrupt */
175
176 struct irq_dest {
177         struct kvm_vcpu *vcpu;
178
179         int32_t ctpr;           /* CPU current task priority */
180         struct irq_queue raised;
181         struct irq_queue servicing;
182
183         /* Count of IRQ sources asserting on non-INT outputs */
184         uint32_t outputs_active[NUM_OUTPUTS];
185 };
186
187 struct openpic {
188         struct kvm *kvm;
189         struct kvm_device *dev;
190         struct kvm_io_device mmio;
191         struct list_head mmio_regions;
192         atomic_t users;
193         bool mmio_mapped;
194
195         gpa_t reg_base;
196         spinlock_t lock;
197
198         /* Behavior control */
199         struct fsl_mpic_info *fsl;
200         uint32_t model;
201         uint32_t flags;
202         uint32_t nb_irqs;
203         uint32_t vid;
204         uint32_t vir;           /* Vendor identification register */
205         uint32_t vector_mask;
206         uint32_t tfrr_reset;
207         uint32_t ivpr_reset;
208         uint32_t idr_reset;
209         uint32_t brr1;
210         uint32_t mpic_mode_mask;
211
212         /* Global registers */
213         uint32_t frr;           /* Feature reporting register */
214         uint32_t gcr;           /* Global configuration register  */
215         uint32_t pir;           /* Processor initialization register */
216         uint32_t spve;          /* Spurious vector register */
217         uint32_t tfrr;          /* Timer frequency reporting register */
218         /* Source registers */
219         struct irq_source src[MAX_IRQ];
220         /* Local registers per output pin */
221         struct irq_dest dst[MAX_CPU];
222         uint32_t nb_cpus;
223         /* Timer registers */
224         struct {
225                 uint32_t tccr;  /* Global timer current count register */
226                 uint32_t tbcr;  /* Global timer base count register */
227         } timers[MAX_TMR];
228         /* Shared MSI registers */
229         struct {
230                 uint32_t msir;  /* Shared Message Signaled Interrupt Register */
231         } msi[MAX_MSI];
232         uint32_t max_irq;
233         uint32_t irq_ipi0;
234         uint32_t irq_tim0;
235         uint32_t irq_msi;
236 };
237
238
239 static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
240                            int output)
241 {
242         struct kvm_interrupt irq = {
243                 .irq = KVM_INTERRUPT_SET_LEVEL,
244         };
245
246         if (!dst->vcpu) {
247                 pr_debug("%s: destination cpu %d does not exist\n",
248                          __func__, (int)(dst - &opp->dst[0]));
249                 return;
250         }
251
252         pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
253                 output);
254
255         if (output != ILR_INTTGT_INT)   /* TODO */
256                 return;
257
258         kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
259 }
260
261 static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
262                            int output)
263 {
264         if (!dst->vcpu) {
265                 pr_debug("%s: destination cpu %d does not exist\n",
266                          __func__, (int)(dst - &opp->dst[0]));
267                 return;
268         }
269
270         pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
271                 output);
272
273         if (output != ILR_INTTGT_INT)   /* TODO */
274                 return;
275
276         kvmppc_core_dequeue_external(dst->vcpu);
277 }
278
279 static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
280 {
281         set_bit(n_IRQ, q->queue);
282 }
283
284 static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
285 {
286         clear_bit(n_IRQ, q->queue);
287 }
288
289 static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
290 {
291         return test_bit(n_IRQ, q->queue);
292 }
293
294 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
295 {
296         int irq = -1;
297         int next = -1;
298         int priority = -1;
299
300         for (;;) {
301                 irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
302                 if (irq == opp->max_irq)
303                         break;
304
305                 pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
306                         irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
307
308                 if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
309                         next = irq;
310                         priority = IVPR_PRIORITY(opp->src[irq].ivpr);
311                 }
312         }
313
314         q->next = next;
315         q->priority = priority;
316 }
317
318 static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
319 {
320         /* XXX: optimize */
321         IRQ_check(opp, q);
322
323         return q->next;
324 }
325
326 static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
327                            bool active, bool was_active)
328 {
329         struct irq_dest *dst;
330         struct irq_source *src;
331         int priority;
332
333         dst = &opp->dst[n_CPU];
334         src = &opp->src[n_IRQ];
335
336         pr_debug("%s: IRQ %d active %d was %d\n",
337                 __func__, n_IRQ, active, was_active);
338
339         if (src->output != ILR_INTTGT_INT) {
340                 pr_debug("%s: output %d irq %d active %d was %d count %d\n",
341                         __func__, src->output, n_IRQ, active, was_active,
342                         dst->outputs_active[src->output]);
343
344                 /* On Freescale MPIC, critical interrupts ignore priority,
345                  * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
346                  * masking.
347                  */
348                 if (active) {
349                         if (!was_active &&
350                             dst->outputs_active[src->output]++ == 0) {
351                                 pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
352                                         __func__, src->output, n_CPU, n_IRQ);
353                                 mpic_irq_raise(opp, dst, src->output);
354                         }
355                 } else {
356                         if (was_active &&
357                             --dst->outputs_active[src->output] == 0) {
358                                 pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
359                                         __func__, src->output, n_CPU, n_IRQ);
360                                 mpic_irq_lower(opp, dst, src->output);
361                         }
362                 }
363
364                 return;
365         }
366
367         priority = IVPR_PRIORITY(src->ivpr);
368
369         /* Even if the interrupt doesn't have enough priority,
370          * it is still raised, in case ctpr is lowered later.
371          */
372         if (active)
373                 IRQ_setbit(&dst->raised, n_IRQ);
374         else
375                 IRQ_resetbit(&dst->raised, n_IRQ);
376
377         IRQ_check(opp, &dst->raised);
378
379         if (active && priority <= dst->ctpr) {
380                 pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
381                         __func__, n_IRQ, priority, dst->ctpr, n_CPU);
382                 active = 0;
383         }
384
385         if (active) {
386                 if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
387                     priority <= dst->servicing.priority) {
388                         pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
389                                 __func__, n_IRQ, dst->servicing.next, n_CPU);
390                 } else {
391                         pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
392                                 __func__, n_CPU, n_IRQ, dst->raised.next);
393                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
394                 }
395         } else {
396                 IRQ_get_next(opp, &dst->servicing);
397                 if (dst->raised.priority > dst->ctpr &&
398                     dst->raised.priority > dst->servicing.priority) {
399                         pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
400                                 __func__, n_IRQ, dst->raised.next,
401                                 dst->raised.priority, dst->ctpr,
402                                 dst->servicing.priority, n_CPU);
403                         /* IRQ line stays asserted */
404                 } else {
405                         pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
406                                 __func__, n_IRQ, dst->ctpr,
407                                 dst->servicing.priority, n_CPU);
408                         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
409                 }
410         }
411 }
412
413 /* update pic state because registers for n_IRQ have changed value */
414 static void openpic_update_irq(struct openpic *opp, int n_IRQ)
415 {
416         struct irq_source *src;
417         bool active, was_active;
418         int i;
419
420         src = &opp->src[n_IRQ];
421         active = src->pending;
422
423         if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
424                 /* Interrupt source is disabled */
425                 pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
426                 active = false;
427         }
428
429         was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
430
431         /*
432          * We don't have a similar check for already-active because
433          * ctpr may have changed and we need to withdraw the interrupt.
434          */
435         if (!active && !was_active) {
436                 pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
437                 return;
438         }
439
440         if (active)
441                 src->ivpr |= IVPR_ACTIVITY_MASK;
442         else
443                 src->ivpr &= ~IVPR_ACTIVITY_MASK;
444
445         if (src->destmask == 0) {
446                 /* No target */
447                 pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
448                 return;
449         }
450
451         if (src->destmask == (1 << src->last_cpu)) {
452                 /* Only one CPU is allowed to receive this IRQ */
453                 IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
454         } else if (!(src->ivpr & IVPR_MODE_MASK)) {
455                 /* Directed delivery mode */
456                 for (i = 0; i < opp->nb_cpus; i++) {
457                         if (src->destmask & (1 << i)) {
458                                 IRQ_local_pipe(opp, i, n_IRQ, active,
459                                                was_active);
460                         }
461                 }
462         } else {
463                 /* Distributed delivery mode */
464                 for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
465                         if (i == opp->nb_cpus)
466                                 i = 0;
467
468                         if (src->destmask & (1 << i)) {
469                                 IRQ_local_pipe(opp, i, n_IRQ, active,
470                                                was_active);
471                                 src->last_cpu = i;
472                                 break;
473                         }
474                 }
475         }
476 }
477
478 static void openpic_set_irq(void *opaque, int n_IRQ, int level)
479 {
480         struct openpic *opp = opaque;
481         struct irq_source *src;
482
483         if (n_IRQ >= MAX_IRQ) {
484                 WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
485                 return;
486         }
487
488         src = &opp->src[n_IRQ];
489         pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
490                 n_IRQ, level, src->ivpr);
491         if (src->level) {
492                 /* level-sensitive irq */
493                 src->pending = level;
494                 openpic_update_irq(opp, n_IRQ);
495         } else {
496                 /* edge-sensitive irq */
497                 if (level) {
498                         src->pending = 1;
499                         openpic_update_irq(opp, n_IRQ);
500                 }
501
502                 if (src->output != ILR_INTTGT_INT) {
503                         /* Edge-triggered interrupts shouldn't be used
504                          * with non-INT delivery, but just in case,
505                          * try to make it do something sane rather than
506                          * cause an interrupt storm.  This is close to
507                          * what you'd probably see happen in real hardware.
508                          */
509                         src->pending = 0;
510                         openpic_update_irq(opp, n_IRQ);
511                 }
512         }
513 }
514
515 static void openpic_reset(struct openpic *opp)
516 {
517         int i;
518
519         opp->gcr = GCR_RESET;
520         /* Initialise controller registers */
521         opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
522             (opp->vid << FRR_VID_SHIFT);
523
524         opp->pir = 0;
525         opp->spve = -1 & opp->vector_mask;
526         opp->tfrr = opp->tfrr_reset;
527         /* Initialise IRQ sources */
528         for (i = 0; i < opp->max_irq; i++) {
529                 opp->src[i].ivpr = opp->ivpr_reset;
530                 opp->src[i].idr = opp->idr_reset;
531
532                 switch (opp->src[i].type) {
533                 case IRQ_TYPE_NORMAL:
534                         opp->src[i].level =
535                             !!(opp->ivpr_reset & IVPR_SENSE_MASK);
536                         break;
537
538                 case IRQ_TYPE_FSLINT:
539                         opp->src[i].ivpr |= IVPR_POLARITY_MASK;
540                         break;
541
542                 case IRQ_TYPE_FSLSPECIAL:
543                         break;
544                 }
545         }
546         /* Initialise IRQ destinations */
547         for (i = 0; i < MAX_CPU; i++) {
548                 opp->dst[i].ctpr = 15;
549                 memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
550                 opp->dst[i].raised.next = -1;
551                 memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
552                 opp->dst[i].servicing.next = -1;
553         }
554         /* Initialise timers */
555         for (i = 0; i < MAX_TMR; i++) {
556                 opp->timers[i].tccr = 0;
557                 opp->timers[i].tbcr = TBCR_CI;
558         }
559         /* Go out of RESET state */
560         opp->gcr = 0;
561 }
562
563 static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
564 {
565         return opp->src[n_IRQ].idr;
566 }
567
568 static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
569 {
570         if (opp->flags & OPENPIC_FLAG_ILR)
571                 return opp->src[n_IRQ].output;
572
573         return 0xffffffff;
574 }
575
576 static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
577 {
578         return opp->src[n_IRQ].ivpr;
579 }
580
581 static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
582                                     uint32_t val)
583 {
584         struct irq_source *src = &opp->src[n_IRQ];
585         uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
586         uint32_t crit_mask = 0;
587         uint32_t mask = normal_mask;
588         int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
589         int i;
590
591         if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
592                 crit_mask = mask << crit_shift;
593                 mask |= crit_mask | IDR_EP;
594         }
595
596         src->idr = val & mask;
597         pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
598
599         if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
600                 if (src->idr & crit_mask) {
601                         if (src->idr & normal_mask) {
602                                 pr_debug("%s: IRQ configured for multiple output types, using critical\n",
603                                         __func__);
604                         }
605
606                         src->output = ILR_INTTGT_CINT;
607                         src->nomask = true;
608                         src->destmask = 0;
609
610                         for (i = 0; i < opp->nb_cpus; i++) {
611                                 int n_ci = IDR_CI0_SHIFT - i;
612
613                                 if (src->idr & (1UL << n_ci))
614                                         src->destmask |= 1UL << i;
615                         }
616                 } else {
617                         src->output = ILR_INTTGT_INT;
618                         src->nomask = false;
619                         src->destmask = src->idr & normal_mask;
620                 }
621         } else {
622                 src->destmask = src->idr;
623         }
624 }
625
626 static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
627                                     uint32_t val)
628 {
629         if (opp->flags & OPENPIC_FLAG_ILR) {
630                 struct irq_source *src = &opp->src[n_IRQ];
631
632                 src->output = val & ILR_INTTGT_MASK;
633                 pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
634                         src->output);
635
636                 /* TODO: on MPIC v4.0 only, set nomask for non-INT */
637         }
638 }
639
640 static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
641                                      uint32_t val)
642 {
643         uint32_t mask;
644
645         /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
646          * the polarity bit is read-only on internal interrupts.
647          */
648         mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
649             IVPR_POLARITY_MASK | opp->vector_mask;
650
651         /* ACTIVITY bit is read-only */
652         opp->src[n_IRQ].ivpr =
653             (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
654
655         /* For FSL internal interrupts, The sense bit is reserved and zero,
656          * and the interrupt is always level-triggered.  Timers and IPIs
657          * have no sense or polarity bits, and are edge-triggered.
658          */
659         switch (opp->src[n_IRQ].type) {
660         case IRQ_TYPE_NORMAL:
661                 opp->src[n_IRQ].level =
662                     !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
663                 break;
664
665         case IRQ_TYPE_FSLINT:
666                 opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
667                 break;
668
669         case IRQ_TYPE_FSLSPECIAL:
670                 opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
671                 break;
672         }
673
674         openpic_update_irq(opp, n_IRQ);
675         pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
676                 opp->src[n_IRQ].ivpr);
677 }
678
679 static void openpic_gcr_write(struct openpic *opp, uint64_t val)
680 {
681         if (val & GCR_RESET) {
682                 openpic_reset(opp);
683                 return;
684         }
685
686         opp->gcr &= ~opp->mpic_mode_mask;
687         opp->gcr |= val & opp->mpic_mode_mask;
688 }
689
690 static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
691 {
692         struct openpic *opp = opaque;
693         int err = 0;
694
695         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
696         if (addr & 0xF)
697                 return 0;
698
699         switch (addr) {
700         case 0x00:      /* Block Revision Register1 (BRR1) is Readonly */
701                 break;
702         case 0x40:
703         case 0x50:
704         case 0x60:
705         case 0x70:
706         case 0x80:
707         case 0x90:
708         case 0xA0:
709         case 0xB0:
710                 err = openpic_cpu_write_internal(opp, addr, val,
711                                                  get_current_cpu());
712                 break;
713         case 0x1000:            /* FRR */
714                 break;
715         case 0x1020:            /* GCR */
716                 openpic_gcr_write(opp, val);
717                 break;
718         case 0x1080:            /* VIR */
719                 break;
720         case 0x1090:            /* PIR */
721                 /*
722                  * This register is used to reset a CPU core --
723                  * let userspace handle it.
724                  */
725                 err = -ENXIO;
726                 break;
727         case 0x10A0:            /* IPI_IVPR */
728         case 0x10B0:
729         case 0x10C0:
730         case 0x10D0: {
731                 int idx;
732                 idx = (addr - 0x10A0) >> 4;
733                 write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
734                 break;
735         }
736         case 0x10E0:            /* SPVE */
737                 opp->spve = val & opp->vector_mask;
738                 break;
739         default:
740                 break;
741         }
742
743         return err;
744 }
745
746 static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
747 {
748         struct openpic *opp = opaque;
749         u32 retval;
750         int err = 0;
751
752         pr_debug("%s: addr %#llx\n", __func__, addr);
753         retval = 0xFFFFFFFF;
754         if (addr & 0xF)
755                 goto out;
756
757         switch (addr) {
758         case 0x1000:            /* FRR */
759                 retval = opp->frr;
760                 retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
761                 break;
762         case 0x1020:            /* GCR */
763                 retval = opp->gcr;
764                 break;
765         case 0x1080:            /* VIR */
766                 retval = opp->vir;
767                 break;
768         case 0x1090:            /* PIR */
769                 retval = 0x00000000;
770                 break;
771         case 0x00:              /* Block Revision Register1 (BRR1) */
772                 retval = opp->brr1;
773                 break;
774         case 0x40:
775         case 0x50:
776         case 0x60:
777         case 0x70:
778         case 0x80:
779         case 0x90:
780         case 0xA0:
781         case 0xB0:
782                 err = openpic_cpu_read_internal(opp, addr,
783                         &retval, get_current_cpu());
784                 break;
785         case 0x10A0:            /* IPI_IVPR */
786         case 0x10B0:
787         case 0x10C0:
788         case 0x10D0:
789                 {
790                         int idx;
791                         idx = (addr - 0x10A0) >> 4;
792                         retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
793                 }
794                 break;
795         case 0x10E0:            /* SPVE */
796                 retval = opp->spve;
797                 break;
798         default:
799                 break;
800         }
801
802 out:
803         pr_debug("%s: => 0x%08x\n", __func__, retval);
804         *ptr = retval;
805         return err;
806 }
807
808 static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
809 {
810         struct openpic *opp = opaque;
811         int idx;
812
813         addr += 0x10f0;
814
815         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
816         if (addr & 0xF)
817                 return 0;
818
819         if (addr == 0x10f0) {
820                 /* TFRR */
821                 opp->tfrr = val;
822                 return 0;
823         }
824
825         idx = (addr >> 6) & 0x3;
826         addr = addr & 0x30;
827
828         switch (addr & 0x30) {
829         case 0x00:              /* TCCR */
830                 break;
831         case 0x10:              /* TBCR */
832                 if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
833                     (val & TBCR_CI) == 0 &&
834                     (opp->timers[idx].tbcr & TBCR_CI) != 0)
835                         opp->timers[idx].tccr &= ~TCCR_TOG;
836
837                 opp->timers[idx].tbcr = val;
838                 break;
839         case 0x20:              /* TVPR */
840                 write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
841                 break;
842         case 0x30:              /* TDR */
843                 write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
844                 break;
845         }
846
847         return 0;
848 }
849
850 static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
851 {
852         struct openpic *opp = opaque;
853         uint32_t retval = -1;
854         int idx;
855
856         pr_debug("%s: addr %#llx\n", __func__, addr);
857         if (addr & 0xF)
858                 goto out;
859
860         idx = (addr >> 6) & 0x3;
861         if (addr == 0x0) {
862                 /* TFRR */
863                 retval = opp->tfrr;
864                 goto out;
865         }
866
867         switch (addr & 0x30) {
868         case 0x00:              /* TCCR */
869                 retval = opp->timers[idx].tccr;
870                 break;
871         case 0x10:              /* TBCR */
872                 retval = opp->timers[idx].tbcr;
873                 break;
874         case 0x20:              /* TIPV */
875                 retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
876                 break;
877         case 0x30:              /* TIDE (TIDR) */
878                 retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
879                 break;
880         }
881
882 out:
883         pr_debug("%s: => 0x%08x\n", __func__, retval);
884         *ptr = retval;
885         return 0;
886 }
887
888 static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
889 {
890         struct openpic *opp = opaque;
891         int idx;
892
893         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
894
895         addr = addr & 0xffff;
896         idx = addr >> 5;
897
898         switch (addr & 0x1f) {
899         case 0x00:
900                 write_IRQreg_ivpr(opp, idx, val);
901                 break;
902         case 0x10:
903                 write_IRQreg_idr(opp, idx, val);
904                 break;
905         case 0x18:
906                 write_IRQreg_ilr(opp, idx, val);
907                 break;
908         }
909
910         return 0;
911 }
912
913 static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
914 {
915         struct openpic *opp = opaque;
916         uint32_t retval;
917         int idx;
918
919         pr_debug("%s: addr %#llx\n", __func__, addr);
920         retval = 0xFFFFFFFF;
921
922         addr = addr & 0xffff;
923         idx = addr >> 5;
924
925         switch (addr & 0x1f) {
926         case 0x00:
927                 retval = read_IRQreg_ivpr(opp, idx);
928                 break;
929         case 0x10:
930                 retval = read_IRQreg_idr(opp, idx);
931                 break;
932         case 0x18:
933                 retval = read_IRQreg_ilr(opp, idx);
934                 break;
935         }
936
937         pr_debug("%s: => 0x%08x\n", __func__, retval);
938         *ptr = retval;
939         return 0;
940 }
941
942 static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
943 {
944         struct openpic *opp = opaque;
945         int idx = opp->irq_msi;
946         int srs, ibs;
947
948         pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
949         if (addr & 0xF)
950                 return 0;
951
952         switch (addr) {
953         case MSIIR_OFFSET:
954                 srs = val >> MSIIR_SRS_SHIFT;
955                 idx += srs;
956                 ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
957                 opp->msi[srs].msir |= 1 << ibs;
958                 openpic_set_irq(opp, idx, 1);
959                 break;
960         default:
961                 /* most registers are read-only, thus ignored */
962                 break;
963         }
964
965         return 0;
966 }
967
968 static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
969 {
970         struct openpic *opp = opaque;
971         uint32_t r = 0;
972         int i, srs;
973
974         pr_debug("%s: addr %#llx\n", __func__, addr);
975         if (addr & 0xF)
976                 return -ENXIO;
977
978         srs = addr >> 4;
979
980         switch (addr) {
981         case 0x00:
982         case 0x10:
983         case 0x20:
984         case 0x30:
985         case 0x40:
986         case 0x50:
987         case 0x60:
988         case 0x70:              /* MSIRs */
989                 r = opp->msi[srs].msir;
990                 /* Clear on read */
991                 opp->msi[srs].msir = 0;
992                 openpic_set_irq(opp, opp->irq_msi + srs, 0);
993                 break;
994         case 0x120:             /* MSISR */
995                 for (i = 0; i < MAX_MSI; i++)
996                         r |= (opp->msi[i].msir ? 1 : 0) << i;
997                 break;
998         }
999
1000         pr_debug("%s: => 0x%08x\n", __func__, r);
1001         *ptr = r;
1002         return 0;
1003 }
1004
1005 static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1006 {
1007         uint32_t r = 0;
1008
1009         pr_debug("%s: addr %#llx\n", __func__, addr);
1010
1011         /* TODO: EISR/EIMR */
1012
1013         *ptr = r;
1014         return 0;
1015 }
1016
1017 static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1018 {
1019         pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1020
1021         /* TODO: EISR/EIMR */
1022         return 0;
1023 }
1024
1025 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1026                                       u32 val, int idx)
1027 {
1028         struct openpic *opp = opaque;
1029         struct irq_source *src;
1030         struct irq_dest *dst;
1031         int s_IRQ, n_IRQ;
1032
1033         pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1034                 addr, val);
1035
1036         if (idx < 0)
1037                 return 0;
1038
1039         if (addr & 0xF)
1040                 return 0;
1041
1042         dst = &opp->dst[idx];
1043         addr &= 0xFF0;
1044         switch (addr) {
1045         case 0x40:              /* IPIDR */
1046         case 0x50:
1047         case 0x60:
1048         case 0x70:
1049                 idx = (addr - 0x40) >> 4;
1050                 /* we use IDE as mask which CPUs to deliver the IPI to still. */
1051                 opp->src[opp->irq_ipi0 + idx].destmask |= val;
1052                 openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1053                 openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1054                 break;
1055         case 0x80:              /* CTPR */
1056                 dst->ctpr = val & 0x0000000F;
1057
1058                 pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1059                         __func__, idx, dst->ctpr, dst->raised.priority,
1060                         dst->servicing.priority);
1061
1062                 if (dst->raised.priority <= dst->ctpr) {
1063                         pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1064                                 __func__, idx);
1065                         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1066                 } else if (dst->raised.priority > dst->servicing.priority) {
1067                         pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1068                                 __func__, idx, dst->raised.next);
1069                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1070                 }
1071
1072                 break;
1073         case 0x90:              /* WHOAMI */
1074                 /* Read-only register */
1075                 break;
1076         case 0xA0:              /* IACK */
1077                 /* Read-only register */
1078                 break;
1079         case 0xB0: {            /* EOI */
1080                 int notify_eoi;
1081
1082                 pr_debug("EOI\n");
1083                 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1084
1085                 if (s_IRQ < 0) {
1086                         pr_debug("%s: EOI with no interrupt in service\n",
1087                                 __func__);
1088                         break;
1089                 }
1090
1091                 IRQ_resetbit(&dst->servicing, s_IRQ);
1092                 /* Notify listeners that the IRQ is over */
1093                 notify_eoi = s_IRQ;
1094                 /* Set up next servicing IRQ */
1095                 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1096                 /* Check queued interrupts. */
1097                 n_IRQ = IRQ_get_next(opp, &dst->raised);
1098                 src = &opp->src[n_IRQ];
1099                 if (n_IRQ != -1 &&
1100                     (s_IRQ == -1 ||
1101                      IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1102                         pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1103                                 idx, n_IRQ);
1104                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1105                 }
1106
1107                 spin_unlock(&opp->lock);
1108                 kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1109                 spin_lock(&opp->lock);
1110
1111                 break;
1112         }
1113         default:
1114                 break;
1115         }
1116
1117         return 0;
1118 }
1119
1120 static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1121 {
1122         struct openpic *opp = opaque;
1123
1124         return openpic_cpu_write_internal(opp, addr, val,
1125                                          (addr & 0x1f000) >> 12);
1126 }
1127
1128 static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1129                              int cpu)
1130 {
1131         struct irq_source *src;
1132         int retval, irq;
1133
1134         pr_debug("Lower OpenPIC INT output\n");
1135         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1136
1137         irq = IRQ_get_next(opp, &dst->raised);
1138         pr_debug("IACK: irq=%d\n", irq);
1139
1140         if (irq == -1)
1141                 /* No more interrupt pending */
1142                 return opp->spve;
1143
1144         src = &opp->src[irq];
1145         if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1146             !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1147                 pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1148                         __func__, irq, dst->ctpr, src->ivpr);
1149                 openpic_update_irq(opp, irq);
1150                 retval = opp->spve;
1151         } else {
1152                 /* IRQ enter servicing state */
1153                 IRQ_setbit(&dst->servicing, irq);
1154                 retval = IVPR_VECTOR(opp, src->ivpr);
1155         }
1156
1157         if (!src->level) {
1158                 /* edge-sensitive IRQ */
1159                 src->ivpr &= ~IVPR_ACTIVITY_MASK;
1160                 src->pending = 0;
1161                 IRQ_resetbit(&dst->raised, irq);
1162         }
1163
1164         if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1165                 src->destmask &= ~(1 << cpu);
1166                 if (src->destmask && !src->level) {
1167                         /* trigger on CPUs that didn't know about it yet */
1168                         openpic_set_irq(opp, irq, 1);
1169                         openpic_set_irq(opp, irq, 0);
1170                         /* if all CPUs knew about it, set active bit again */
1171                         src->ivpr |= IVPR_ACTIVITY_MASK;
1172                 }
1173         }
1174
1175         return retval;
1176 }
1177
1178 void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1179 {
1180         struct openpic *opp = vcpu->arch.mpic;
1181         int cpu = vcpu->arch.irq_cpu_id;
1182         unsigned long flags;
1183
1184         spin_lock_irqsave(&opp->lock, flags);
1185
1186         if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1187                 kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1188
1189         spin_unlock_irqrestore(&opp->lock, flags);
1190 }
1191
1192 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1193                                      u32 *ptr, int idx)
1194 {
1195         struct openpic *opp = opaque;
1196         struct irq_dest *dst;
1197         uint32_t retval;
1198
1199         pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1200         retval = 0xFFFFFFFF;
1201
1202         if (idx < 0)
1203                 goto out;
1204
1205         if (addr & 0xF)
1206                 goto out;
1207
1208         dst = &opp->dst[idx];
1209         addr &= 0xFF0;
1210         switch (addr) {
1211         case 0x80:              /* CTPR */
1212                 retval = dst->ctpr;
1213                 break;
1214         case 0x90:              /* WHOAMI */
1215                 retval = idx;
1216                 break;
1217         case 0xA0:              /* IACK */
1218                 retval = openpic_iack(opp, dst, idx);
1219                 break;
1220         case 0xB0:              /* EOI */
1221                 retval = 0;
1222                 break;
1223         default:
1224                 break;
1225         }
1226         pr_debug("%s: => 0x%08x\n", __func__, retval);
1227
1228 out:
1229         *ptr = retval;
1230         return 0;
1231 }
1232
1233 static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1234 {
1235         struct openpic *opp = opaque;
1236
1237         return openpic_cpu_read_internal(opp, addr, ptr,
1238                                          (addr & 0x1f000) >> 12);
1239 }
1240
1241 struct mem_reg {
1242         struct list_head list;
1243         int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1244         int (*write)(void *opaque, gpa_t addr, u32 val);
1245         gpa_t start_addr;
1246         int size;
1247 };
1248
1249 static struct mem_reg openpic_gbl_mmio = {
1250         .write = openpic_gbl_write,
1251         .read = openpic_gbl_read,
1252         .start_addr = OPENPIC_GLB_REG_START,
1253         .size = OPENPIC_GLB_REG_SIZE,
1254 };
1255
1256 static struct mem_reg openpic_tmr_mmio = {
1257         .write = openpic_tmr_write,
1258         .read = openpic_tmr_read,
1259         .start_addr = OPENPIC_TMR_REG_START,
1260         .size = OPENPIC_TMR_REG_SIZE,
1261 };
1262
1263 static struct mem_reg openpic_cpu_mmio = {
1264         .write = openpic_cpu_write,
1265         .read = openpic_cpu_read,
1266         .start_addr = OPENPIC_CPU_REG_START,
1267         .size = OPENPIC_CPU_REG_SIZE,
1268 };
1269
1270 static struct mem_reg openpic_src_mmio = {
1271         .write = openpic_src_write,
1272         .read = openpic_src_read,
1273         .start_addr = OPENPIC_SRC_REG_START,
1274         .size = OPENPIC_SRC_REG_SIZE,
1275 };
1276
1277 static struct mem_reg openpic_msi_mmio = {
1278         .read = openpic_msi_read,
1279         .write = openpic_msi_write,
1280         .start_addr = OPENPIC_MSI_REG_START,
1281         .size = OPENPIC_MSI_REG_SIZE,
1282 };
1283
1284 static struct mem_reg openpic_summary_mmio = {
1285         .read = openpic_summary_read,
1286         .write = openpic_summary_write,
1287         .start_addr = OPENPIC_SUMMARY_REG_START,
1288         .size = OPENPIC_SUMMARY_REG_SIZE,
1289 };
1290
1291 static void fsl_common_init(struct openpic *opp)
1292 {
1293         int i;
1294         int virq = MAX_SRC;
1295
1296         list_add(&openpic_msi_mmio.list, &opp->mmio_regions);
1297         list_add(&openpic_summary_mmio.list, &opp->mmio_regions);
1298
1299         opp->vid = VID_REVISION_1_2;
1300         opp->vir = VIR_GENERIC;
1301         opp->vector_mask = 0xFFFF;
1302         opp->tfrr_reset = 0;
1303         opp->ivpr_reset = IVPR_MASK_MASK;
1304         opp->idr_reset = 1 << 0;
1305         opp->max_irq = MAX_IRQ;
1306
1307         opp->irq_ipi0 = virq;
1308         virq += MAX_IPI;
1309         opp->irq_tim0 = virq;
1310         virq += MAX_TMR;
1311
1312         BUG_ON(virq > MAX_IRQ);
1313
1314         opp->irq_msi = 224;
1315
1316         for (i = 0; i < opp->fsl->max_ext; i++)
1317                 opp->src[i].level = false;
1318
1319         /* Internal interrupts, including message and MSI */
1320         for (i = 16; i < MAX_SRC; i++) {
1321                 opp->src[i].type = IRQ_TYPE_FSLINT;
1322                 opp->src[i].level = true;
1323         }
1324
1325         /* timers and IPIs */
1326         for (i = MAX_SRC; i < virq; i++) {
1327                 opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1328                 opp->src[i].level = false;
1329         }
1330 }
1331
1332 static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1333 {
1334         struct list_head *node;
1335
1336         list_for_each(node, &opp->mmio_regions) {
1337                 struct mem_reg *mr = list_entry(node, struct mem_reg, list);
1338
1339                 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1340                         continue;
1341
1342                 return mr->read(opp, addr - mr->start_addr, ptr);
1343         }
1344
1345         return -ENXIO;
1346 }
1347
1348 static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1349 {
1350         struct list_head *node;
1351
1352         list_for_each(node, &opp->mmio_regions) {
1353                 struct mem_reg *mr = list_entry(node, struct mem_reg, list);
1354
1355                 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1356                         continue;
1357
1358                 return mr->write(opp, addr - mr->start_addr, val);
1359         }
1360
1361         return -ENXIO;
1362 }
1363
1364 static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
1365                          int len, void *ptr)
1366 {
1367         struct openpic *opp = container_of(this, struct openpic, mmio);
1368         int ret;
1369         union {
1370                 u32 val;
1371                 u8 bytes[4];
1372         } u;
1373
1374         if (addr & (len - 1)) {
1375                 pr_debug("%s: bad alignment %llx/%d\n",
1376                          __func__, addr, len);
1377                 return -EINVAL;
1378         }
1379
1380         spin_lock_irq(&opp->lock);
1381         ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1382         spin_unlock_irq(&opp->lock);
1383
1384         /*
1385          * Technically only 32-bit accesses are allowed, but be nice to
1386          * people dumping registers a byte at a time -- it works in real
1387          * hardware (reads only, not writes).
1388          */
1389         if (len == 4) {
1390                 *(u32 *)ptr = u.val;
1391                 pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1392                          __func__, addr, ret, u.val);
1393         } else if (len == 1) {
1394                 *(u8 *)ptr = u.bytes[addr & 3];
1395                 pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1396                          __func__, addr, ret, u.bytes[addr & 3]);
1397         } else {
1398                 pr_debug("%s: bad length %d\n", __func__, len);
1399                 return -EINVAL;
1400         }
1401
1402         return ret;
1403 }
1404
1405 static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
1406                           int len, const void *ptr)
1407 {
1408         struct openpic *opp = container_of(this, struct openpic, mmio);
1409         int ret;
1410
1411         if (len != 4) {
1412                 pr_debug("%s: bad length %d\n", __func__, len);
1413                 return -EOPNOTSUPP;
1414         }
1415         if (addr & 3) {
1416                 pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1417                 return -EOPNOTSUPP;
1418         }
1419
1420         spin_lock_irq(&opp->lock);
1421         ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1422                                       *(const u32 *)ptr);
1423         spin_unlock_irq(&opp->lock);
1424
1425         pr_debug("%s: addr %llx ret %d val %x\n",
1426                  __func__, addr, ret, *(const u32 *)ptr);
1427
1428         return ret;
1429 }
1430
1431 static void kvm_mpic_dtor(struct kvm_io_device *this)
1432 {
1433         struct openpic *opp = container_of(this, struct openpic, mmio);
1434
1435         opp->mmio_mapped = false;
1436 }
1437
1438 static const struct kvm_io_device_ops mpic_mmio_ops = {
1439         .read = kvm_mpic_read,
1440         .write = kvm_mpic_write,
1441         .destructor = kvm_mpic_dtor,
1442 };
1443
1444 static void map_mmio(struct openpic *opp)
1445 {
1446         BUG_ON(opp->mmio_mapped);
1447         opp->mmio_mapped = true;
1448
1449         kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1450
1451         kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1452                                 opp->reg_base, OPENPIC_REG_SIZE,
1453                                 &opp->mmio);
1454 }
1455
1456 static void unmap_mmio(struct openpic *opp)
1457 {
1458         if (opp->mmio_mapped) {
1459                 opp->mmio_mapped = false;
1460                 kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1461         }
1462 }
1463
1464 static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1465 {
1466         u64 base;
1467
1468         if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1469                 return -EFAULT;
1470
1471         if (base & 0x3ffff) {
1472                 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1473                          __func__, base);
1474                 return -EINVAL;
1475         }
1476
1477         if (base == opp->reg_base)
1478                 return 0;
1479
1480         mutex_lock(&opp->kvm->slots_lock);
1481
1482         unmap_mmio(opp);
1483         opp->reg_base = base;
1484
1485         pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1486                  __func__, base);
1487
1488         if (base == 0)
1489                 goto out;
1490
1491         map_mmio(opp);
1492
1493         mutex_unlock(&opp->kvm->slots_lock);
1494 out:
1495         return 0;
1496 }
1497
1498 #define ATTR_SET                0
1499 #define ATTR_GET                1
1500
1501 static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1502 {
1503         int ret;
1504
1505         if (addr & 3)
1506                 return -ENXIO;
1507
1508         spin_lock_irq(&opp->lock);
1509
1510         if (type == ATTR_SET)
1511                 ret = kvm_mpic_write_internal(opp, addr, *val);
1512         else
1513                 ret = kvm_mpic_read_internal(opp, addr, val);
1514
1515         spin_unlock_irq(&opp->lock);
1516
1517         pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1518
1519         return ret;
1520 }
1521
1522 static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1523 {
1524         struct openpic *opp = dev->private;
1525         u32 attr32;
1526
1527         switch (attr->group) {
1528         case KVM_DEV_MPIC_GRP_MISC:
1529                 switch (attr->attr) {
1530                 case KVM_DEV_MPIC_BASE_ADDR:
1531                         return set_base_addr(opp, attr);
1532                 }
1533
1534                 break;
1535
1536         case KVM_DEV_MPIC_GRP_REGISTER:
1537                 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1538                         return -EFAULT;
1539
1540                 return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1541
1542         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1543                 if (attr->attr > MAX_SRC)
1544                         return -EINVAL;
1545
1546                 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1547                         return -EFAULT;
1548
1549                 if (attr32 != 0 && attr32 != 1)
1550                         return -EINVAL;
1551
1552                 spin_lock_irq(&opp->lock);
1553                 openpic_set_irq(opp, attr->attr, attr32);
1554                 spin_unlock_irq(&opp->lock);
1555                 return 0;
1556         }
1557
1558         return -ENXIO;
1559 }
1560
1561 static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1562 {
1563         struct openpic *opp = dev->private;
1564         u64 attr64;
1565         u32 attr32;
1566         int ret;
1567
1568         switch (attr->group) {
1569         case KVM_DEV_MPIC_GRP_MISC:
1570                 switch (attr->attr) {
1571                 case KVM_DEV_MPIC_BASE_ADDR:
1572                         mutex_lock(&opp->kvm->slots_lock);
1573                         attr64 = opp->reg_base;
1574                         mutex_unlock(&opp->kvm->slots_lock);
1575
1576                         if (copy_to_user((u64 __user *)(long)attr->addr,
1577                                          &attr64, sizeof(u64)))
1578                                 return -EFAULT;
1579
1580                         return 0;
1581                 }
1582
1583                 break;
1584
1585         case KVM_DEV_MPIC_GRP_REGISTER:
1586                 ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1587                 if (ret)
1588                         return ret;
1589
1590                 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1591                         return -EFAULT;
1592
1593                 return 0;
1594
1595         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1596                 if (attr->attr > MAX_SRC)
1597                         return -EINVAL;
1598
1599                 spin_lock_irq(&opp->lock);
1600                 attr32 = opp->src[attr->attr].pending;
1601                 spin_unlock_irq(&opp->lock);
1602
1603                 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1604                         return -EFAULT;
1605
1606                 return 0;
1607         }
1608
1609         return -ENXIO;
1610 }
1611
1612 static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1613 {
1614         switch (attr->group) {
1615         case KVM_DEV_MPIC_GRP_MISC:
1616                 switch (attr->attr) {
1617                 case KVM_DEV_MPIC_BASE_ADDR:
1618                         return 0;
1619                 }
1620
1621                 break;
1622
1623         case KVM_DEV_MPIC_GRP_REGISTER:
1624                 return 0;
1625
1626         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1627                 if (attr->attr > MAX_SRC)
1628                         break;
1629
1630                 return 0;
1631         }
1632
1633         return -ENXIO;
1634 }
1635
1636 static void mpic_destroy(struct kvm_device *dev)
1637 {
1638         struct openpic *opp = dev->private;
1639
1640         if (opp->mmio_mapped) {
1641                 /*
1642                  * Normally we get unmapped by kvm_io_bus_destroy(),
1643                  * which happens before the VCPUs release their references.
1644                  *
1645                  * Thus, we should only get here if no VCPUs took a reference
1646                  * to us in the first place.
1647                  */
1648                 WARN_ON(opp->nb_cpus != 0);
1649                 unmap_mmio(opp);
1650         }
1651
1652         dev->kvm->arch.mpic = NULL;
1653         kfree(opp);
1654 }
1655
1656 static int mpic_set_default_irq_routing(struct openpic *opp)
1657 {
1658         struct kvm_irq_routing_entry *routing;
1659
1660         /* Create a nop default map, so that dereferencing it still works */
1661         routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1662         if (!routing)
1663                 return -ENOMEM;
1664
1665         kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1666
1667         kfree(routing);
1668         return 0;
1669 }
1670
1671 static int mpic_create(struct kvm_device *dev, u32 type)
1672 {
1673         struct openpic *opp;
1674         int ret;
1675
1676         /* We only support one MPIC at a time for now */
1677         if (dev->kvm->arch.mpic)
1678                 return -EINVAL;
1679
1680         opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1681         if (!opp)
1682                 return -ENOMEM;
1683
1684         dev->private = opp;
1685         opp->kvm = dev->kvm;
1686         opp->dev = dev;
1687         opp->model = type;
1688         spin_lock_init(&opp->lock);
1689
1690         INIT_LIST_HEAD(&opp->mmio_regions);
1691         list_add(&openpic_gbl_mmio.list, &opp->mmio_regions);
1692         list_add(&openpic_tmr_mmio.list, &opp->mmio_regions);
1693         list_add(&openpic_src_mmio.list, &opp->mmio_regions);
1694         list_add(&openpic_cpu_mmio.list, &opp->mmio_regions);
1695
1696         switch (opp->model) {
1697         case KVM_DEV_TYPE_FSL_MPIC_20:
1698                 opp->fsl = &fsl_mpic_20;
1699                 opp->brr1 = 0x00400200;
1700                 opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1701                 opp->nb_irqs = 80;
1702                 opp->mpic_mode_mask = GCR_MODE_MIXED;
1703
1704                 fsl_common_init(opp);
1705
1706                 break;
1707
1708         case KVM_DEV_TYPE_FSL_MPIC_42:
1709                 opp->fsl = &fsl_mpic_42;
1710                 opp->brr1 = 0x00400402;
1711                 opp->flags |= OPENPIC_FLAG_ILR;
1712                 opp->nb_irqs = 196;
1713                 opp->mpic_mode_mask = GCR_MODE_PROXY;
1714
1715                 fsl_common_init(opp);
1716
1717                 break;
1718
1719         default:
1720                 ret = -ENODEV;
1721                 goto err;
1722         }
1723
1724         ret = mpic_set_default_irq_routing(opp);
1725         if (ret)
1726                 goto err;
1727
1728         openpic_reset(opp);
1729
1730         smp_wmb();
1731         dev->kvm->arch.mpic = opp;
1732
1733         return 0;
1734
1735 err:
1736         kfree(opp);
1737         return ret;
1738 }
1739
1740 struct kvm_device_ops kvm_mpic_ops = {
1741         .name = "kvm-mpic",
1742         .create = mpic_create,
1743         .destroy = mpic_destroy,
1744         .set_attr = mpic_set_attr,
1745         .get_attr = mpic_get_attr,
1746         .has_attr = mpic_has_attr,
1747 };
1748
1749 int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1750                              u32 cpu)
1751 {
1752         struct openpic *opp = dev->private;
1753         int ret = 0;
1754
1755         if (dev->ops != &kvm_mpic_ops)
1756                 return -EPERM;
1757         if (opp->kvm != vcpu->kvm)
1758                 return -EPERM;
1759         if (cpu < 0 || cpu >= MAX_CPU)
1760                 return -EPERM;
1761
1762         spin_lock_irq(&opp->lock);
1763
1764         if (opp->dst[cpu].vcpu) {
1765                 ret = -EEXIST;
1766                 goto out;
1767         }
1768         if (vcpu->arch.irq_type) {
1769                 ret = -EBUSY;
1770                 goto out;
1771         }
1772
1773         opp->dst[cpu].vcpu = vcpu;
1774         opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1775
1776         vcpu->arch.mpic = opp;
1777         vcpu->arch.irq_cpu_id = cpu;
1778         vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1779
1780         /* This might need to be changed if GCR gets extended */
1781         if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1782                 vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1783
1784 out:
1785         spin_unlock_irq(&opp->lock);
1786         return ret;
1787 }
1788
1789 /*
1790  * This should only happen immediately before the mpic is destroyed,
1791  * so we shouldn't need to worry about anything still trying to
1792  * access the vcpu pointer.
1793  */
1794 void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1795 {
1796         BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1797
1798         opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1799 }
1800
1801 /*
1802  * Return value:
1803  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
1804  *  = 0   Interrupt was coalesced (previous irq is still pending)
1805  *  > 0   Number of CPUs interrupt was delivered to
1806  */
1807 static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1808                         struct kvm *kvm, int irq_source_id, int level,
1809                         bool line_status)
1810 {
1811         u32 irq = e->irqchip.pin;
1812         struct openpic *opp = kvm->arch.mpic;
1813         unsigned long flags;
1814
1815         spin_lock_irqsave(&opp->lock, flags);
1816         openpic_set_irq(opp, irq, level);
1817         spin_unlock_irqrestore(&opp->lock, flags);
1818
1819         /* All code paths we care about don't check for the return value */
1820         return 0;
1821 }
1822
1823 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1824                 struct kvm *kvm, int irq_source_id, int level, bool line_status)
1825 {
1826         struct openpic *opp = kvm->arch.mpic;
1827         unsigned long flags;
1828
1829         spin_lock_irqsave(&opp->lock, flags);
1830
1831         /*
1832          * XXX We ignore the target address for now, as we only support
1833          *     a single MSI bank.
1834          */
1835         openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1836         spin_unlock_irqrestore(&opp->lock, flags);
1837
1838         /* All code paths we care about don't check for the return value */
1839         return 0;
1840 }
1841
1842 int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
1843                           struct kvm_kernel_irq_routing_entry *e,
1844                           const struct kvm_irq_routing_entry *ue)
1845 {
1846         int r = -EINVAL;
1847
1848         switch (ue->type) {
1849         case KVM_IRQ_ROUTING_IRQCHIP:
1850                 e->set = mpic_set_irq;
1851                 e->irqchip.irqchip = ue->u.irqchip.irqchip;
1852                 e->irqchip.pin = ue->u.irqchip.pin;
1853                 if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1854                         goto out;
1855                 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
1856                 break;
1857         case KVM_IRQ_ROUTING_MSI:
1858                 e->set = kvm_set_msi;
1859                 e->msi.address_lo = ue->u.msi.address_lo;
1860                 e->msi.address_hi = ue->u.msi.address_hi;
1861                 e->msi.data = ue->u.msi.data;
1862                 break;
1863         default:
1864                 goto out;
1865         }
1866
1867         r = 0;
1868 out:
1869         return r;
1870 }