James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | // Copyright (C) 2017 Arm Ltd. |
| 3 | #define pr_fmt(fmt) "sdei: " fmt |
| 4 | |
| 5 | #include <linux/arm_sdei.h> |
| 6 | #include <linux/hardirq.h> |
| 7 | #include <linux/irqflags.h> |
| 8 | #include <linux/sched/task_stack.h> |
| 9 | #include <linux/uaccess.h> |
| 10 | |
| 11 | #include <asm/alternative.h> |
| 12 | #include <asm/kprobes.h> |
James Morse | 79e9aa5 | 2018-01-08 15:38:18 +0000 | [diff] [blame] | 13 | #include <asm/mmu.h> |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 14 | #include <asm/ptrace.h> |
James Morse | 79e9aa5 | 2018-01-08 15:38:18 +0000 | [diff] [blame] | 15 | #include <asm/sections.h> |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 16 | #include <asm/stacktrace.h> |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 17 | #include <asm/sysreg.h> |
| 18 | #include <asm/vmap_stack.h> |
| 19 | |
| 20 | unsigned long sdei_exit_mode; |
| 21 | |
| 22 | /* |
| 23 | * VMAP'd stacks checking for stack overflow on exception using sp as a scratch |
| 24 | * register, meaning SDEI has to switch to its own stack. We need two stacks as |
| 25 | * a critical event may interrupt a normal event that has just taken a |
| 26 | * synchronous exception, and is using sp as scratch register. For a critical |
| 27 | * event interrupting a normal event, we can't reliably tell if we were on the |
| 28 | * sdei stack. |
| 29 | * For now, we allocate stacks when the driver is probed. |
| 30 | */ |
| 31 | DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); |
| 32 | DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); |
| 33 | |
| 34 | #ifdef CONFIG_VMAP_STACK |
| 35 | DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); |
| 36 | DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); |
| 37 | #endif |
| 38 | |
| 39 | static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) |
| 40 | { |
| 41 | unsigned long *p; |
| 42 | |
| 43 | p = per_cpu(*ptr, cpu); |
| 44 | if (p) { |
| 45 | per_cpu(*ptr, cpu) = NULL; |
| 46 | vfree(p); |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | static void free_sdei_stacks(void) |
| 51 | { |
| 52 | int cpu; |
| 53 | |
| 54 | for_each_possible_cpu(cpu) { |
| 55 | _free_sdei_stack(&sdei_stack_normal_ptr, cpu); |
| 56 | _free_sdei_stack(&sdei_stack_critical_ptr, cpu); |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu) |
| 61 | { |
| 62 | unsigned long *p; |
| 63 | |
| 64 | p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu)); |
| 65 | if (!p) |
| 66 | return -ENOMEM; |
| 67 | per_cpu(*ptr, cpu) = p; |
| 68 | |
| 69 | return 0; |
| 70 | } |
| 71 | |
| 72 | static int init_sdei_stacks(void) |
| 73 | { |
| 74 | int cpu; |
| 75 | int err = 0; |
| 76 | |
| 77 | for_each_possible_cpu(cpu) { |
| 78 | err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); |
| 79 | if (err) |
| 80 | break; |
| 81 | err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu); |
| 82 | if (err) |
| 83 | break; |
| 84 | } |
| 85 | |
| 86 | if (err) |
| 87 | free_sdei_stacks(); |
| 88 | |
| 89 | return err; |
| 90 | } |
| 91 | |
Will Deacon | eab1cec | 2018-07-31 12:02:18 +0100 | [diff] [blame] | 92 | static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 93 | { |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 94 | unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); |
| 95 | unsigned long high = low + SDEI_STACK_SIZE; |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 96 | |
Wei Li | 1c41860 | 2019-04-01 11:55:57 +0800 | [diff] [blame] | 97 | if (!low) |
| 98 | return false; |
| 99 | |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 100 | if (sp < low || sp >= high) |
| 101 | return false; |
| 102 | |
| 103 | if (info) { |
| 104 | info->low = low; |
| 105 | info->high = high; |
| 106 | info->type = STACK_TYPE_SDEI_NORMAL; |
| 107 | } |
| 108 | |
| 109 | return true; |
| 110 | } |
| 111 | |
Will Deacon | eab1cec | 2018-07-31 12:02:18 +0100 | [diff] [blame] | 112 | static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 113 | { |
| 114 | unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); |
| 115 | unsigned long high = low + SDEI_STACK_SIZE; |
| 116 | |
Wei Li | 1c41860 | 2019-04-01 11:55:57 +0800 | [diff] [blame] | 117 | if (!low) |
| 118 | return false; |
| 119 | |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 120 | if (sp < low || sp >= high) |
| 121 | return false; |
| 122 | |
| 123 | if (info) { |
| 124 | info->low = low; |
| 125 | info->high = high; |
| 126 | info->type = STACK_TYPE_SDEI_CRITICAL; |
| 127 | } |
| 128 | |
| 129 | return true; |
| 130 | } |
| 131 | |
Will Deacon | eab1cec | 2018-07-31 12:02:18 +0100 | [diff] [blame] | 132 | bool _on_sdei_stack(unsigned long sp, struct stack_info *info) |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 133 | { |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 134 | if (!IS_ENABLED(CONFIG_VMAP_STACK)) |
| 135 | return false; |
| 136 | |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 137 | if (on_sdei_critical_stack(sp, info)) |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 138 | return true; |
| 139 | |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 140 | if (on_sdei_normal_stack(sp, info)) |
| 141 | return true; |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 142 | |
Laura Abbott | 8a1ccfb | 2018-07-20 14:41:53 -0700 | [diff] [blame] | 143 | return false; |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 144 | } |
| 145 | |
| 146 | unsigned long sdei_arch_get_entry_point(int conduit) |
| 147 | { |
| 148 | /* |
| 149 | * SDEI works between adjacent exception levels. If we booted at EL1 we |
| 150 | * assume a hypervisor is marshalling events. If we booted at EL2 and |
| 151 | * dropped to EL1 because we don't support VHE, then we can't support |
| 152 | * SDEI. |
| 153 | */ |
| 154 | if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { |
| 155 | pr_err("Not supported on this hardware/boot configuration\n"); |
| 156 | return 0; |
| 157 | } |
| 158 | |
| 159 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
| 160 | if (init_sdei_stacks()) |
| 161 | return 0; |
| 162 | } |
| 163 | |
| 164 | sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; |
James Morse | 79e9aa5 | 2018-01-08 15:38:18 +0000 | [diff] [blame] | 165 | |
| 166 | #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 |
| 167 | if (arm64_kernel_unmapped_at_el0()) { |
| 168 | unsigned long offset; |
| 169 | |
| 170 | offset = (unsigned long)__sdei_asm_entry_trampoline - |
| 171 | (unsigned long)__entry_tramp_text_start; |
| 172 | return TRAMP_VALIAS + offset; |
| 173 | } else |
| 174 | #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ |
| 175 | return (unsigned long)__sdei_asm_handler; |
| 176 | |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | /* |
| 180 | * __sdei_handler() returns one of: |
| 181 | * SDEI_EV_HANDLED - success, return to the interrupted context. |
| 182 | * SDEI_EV_FAILED - failure, return this error code to firmare. |
| 183 | * virtual-address - success, return to this address. |
| 184 | */ |
| 185 | static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, |
| 186 | struct sdei_registered_event *arg) |
| 187 | { |
| 188 | u32 mode; |
| 189 | int i, err = 0; |
James Morse | 79e9aa5 | 2018-01-08 15:38:18 +0000 | [diff] [blame] | 190 | int clobbered_registers = 4; |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 191 | u64 elr = read_sysreg(elr_el1); |
| 192 | u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */ |
| 193 | unsigned long vbar = read_sysreg(vbar_el1); |
| 194 | |
James Morse | 79e9aa5 | 2018-01-08 15:38:18 +0000 | [diff] [blame] | 195 | if (arm64_kernel_unmapped_at_el0()) |
| 196 | clobbered_registers++; |
| 197 | |
James Morse | f5df269 | 2018-01-08 15:38:12 +0000 | [diff] [blame] | 198 | /* Retrieve the missing registers values */ |
| 199 | for (i = 0; i < clobbered_registers; i++) { |
| 200 | /* from within the handler, this call always succeeds */ |
| 201 | sdei_api_event_context(i, ®s->regs[i]); |
| 202 | } |
| 203 | |
| 204 | /* |
| 205 | * We didn't take an exception to get here, set PAN. UAO will be cleared |
| 206 | * by sdei_event_handler()s set_fs(USER_DS) call. |
| 207 | */ |
| 208 | __uaccess_enable_hw_pan(); |
| 209 | |
| 210 | err = sdei_event_handler(regs, arg); |
| 211 | if (err) |
| 212 | return SDEI_EV_FAILED; |
| 213 | |
| 214 | if (elr != read_sysreg(elr_el1)) { |
| 215 | /* |
| 216 | * We took a synchronous exception from the SDEI handler. |
| 217 | * This could deadlock, and if you interrupt KVM it will |
| 218 | * hyp-panic instead. |
| 219 | */ |
| 220 | pr_warn("unsafe: exception during handler\n"); |
| 221 | } |
| 222 | |
| 223 | mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK); |
| 224 | |
| 225 | /* |
| 226 | * If we interrupted the kernel with interrupts masked, we always go |
| 227 | * back to wherever we came from. |
| 228 | */ |
| 229 | if (mode == kernel_mode && !interrupts_enabled(regs)) |
| 230 | return SDEI_EV_HANDLED; |
| 231 | |
| 232 | /* |
| 233 | * Otherwise, we pretend this was an IRQ. This lets user space tasks |
| 234 | * receive signals before we return to them, and KVM to invoke it's |
| 235 | * world switch to do the same. |
| 236 | * |
| 237 | * See DDI0487B.a Table D1-7 'Vector offsets from vector table base |
| 238 | * address'. |
| 239 | */ |
| 240 | if (mode == kernel_mode) |
| 241 | return vbar + 0x280; |
| 242 | else if (mode & PSR_MODE32_BIT) |
| 243 | return vbar + 0x680; |
| 244 | |
| 245 | return vbar + 0x480; |
| 246 | } |
| 247 | |
| 248 | |
| 249 | asmlinkage __kprobes notrace unsigned long |
| 250 | __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) |
| 251 | { |
| 252 | unsigned long ret; |
| 253 | bool do_nmi_exit = false; |
| 254 | |
| 255 | /* |
| 256 | * nmi_enter() deals with printk() re-entrance and use of RCU when |
| 257 | * RCU believed this CPU was idle. Because critical events can |
| 258 | * interrupt normal events, we may already be in_nmi(). |
| 259 | */ |
| 260 | if (!in_nmi()) { |
| 261 | nmi_enter(); |
| 262 | do_nmi_exit = true; |
| 263 | } |
| 264 | |
| 265 | ret = _sdei_handler(regs, arg); |
| 266 | |
| 267 | if (do_nmi_exit) |
| 268 | nmi_exit(); |
| 269 | |
| 270 | return ret; |
| 271 | } |