blob: caac519d62490d18104826d83e3f6a055bba3265 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Ard Biesheuvel86464852015-03-10 09:47:47 +01002/*
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4 *
5 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
Ard Biesheuvel86464852015-03-10 09:47:47 +01006 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11 .text
12 .fpu crypto-neon-fp-armv8
13 .align 3
14
15 .macro enc_round, state, key
16 aese.8 \state, \key
17 aesmc.8 \state, \state
18 .endm
19
20 .macro dec_round, state, key
21 aesd.8 \state, \key
22 aesimc.8 \state, \state
23 .endm
24
25 .macro enc_dround, key1, key2
26 enc_round q0, \key1
27 enc_round q0, \key2
28 .endm
29
30 .macro dec_dround, key1, key2
31 dec_round q0, \key1
32 dec_round q0, \key2
33 .endm
34
35 .macro enc_fround, key1, key2, key3
36 enc_round q0, \key1
37 aese.8 q0, \key2
38 veor q0, q0, \key3
39 .endm
40
41 .macro dec_fround, key1, key2, key3
42 dec_round q0, \key1
43 aesd.8 q0, \key2
44 veor q0, q0, \key3
45 .endm
46
47 .macro enc_dround_3x, key1, key2
48 enc_round q0, \key1
49 enc_round q1, \key1
50 enc_round q2, \key1
51 enc_round q0, \key2
52 enc_round q1, \key2
53 enc_round q2, \key2
54 .endm
55
56 .macro dec_dround_3x, key1, key2
57 dec_round q0, \key1
58 dec_round q1, \key1
59 dec_round q2, \key1
60 dec_round q0, \key2
61 dec_round q1, \key2
62 dec_round q2, \key2
63 .endm
64
65 .macro enc_fround_3x, key1, key2, key3
66 enc_round q0, \key1
67 enc_round q1, \key1
68 enc_round q2, \key1
69 aese.8 q0, \key2
70 aese.8 q1, \key2
71 aese.8 q2, \key2
72 veor q0, q0, \key3
73 veor q1, q1, \key3
74 veor q2, q2, \key3
75 .endm
76
77 .macro dec_fround_3x, key1, key2, key3
78 dec_round q0, \key1
79 dec_round q1, \key1
80 dec_round q2, \key1
81 aesd.8 q0, \key2
82 aesd.8 q1, \key2
83 aesd.8 q2, \key2
84 veor q0, q0, \key3
85 veor q1, q1, \key3
86 veor q2, q2, \key3
87 .endm
88
89 .macro do_block, dround, fround
90 cmp r3, #12 @ which key size?
91 vld1.8 {q10-q11}, [ip]!
92 \dround q8, q9
93 vld1.8 {q12-q13}, [ip]!
94 \dround q10, q11
95 vld1.8 {q10-q11}, [ip]!
96 \dround q12, q13
97 vld1.8 {q12-q13}, [ip]!
98 \dround q10, q11
99 blo 0f @ AES-128: 10 rounds
100 vld1.8 {q10-q11}, [ip]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100101 \dround q12, q13
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +0200102 beq 1f @ AES-192: 12 rounds
Ard Biesheuvel86464852015-03-10 09:47:47 +0100103 vld1.8 {q12-q13}, [ip]
104 \dround q10, q11
1050: \fround q12, q13, q14
106 bx lr
107
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +02001081: \fround q10, q11, q14
Ard Biesheuvel86464852015-03-10 09:47:47 +0100109 bx lr
110 .endm
111
112 /*
113 * Internal, non-AAPCS compliant functions that implement the core AES
114 * transforms. These should preserve all registers except q0 - q2 and ip
115 * Arguments:
116 * q0 : first in/output block
117 * q1 : second in/output block (_3x version only)
118 * q2 : third in/output block (_3x version only)
119 * q8 : first round key
120 * q9 : secound round key
Ard Biesheuvel86464852015-03-10 09:47:47 +0100121 * q14 : final round key
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +0200122 * r2 : address of round key array
Ard Biesheuvel86464852015-03-10 09:47:47 +0100123 * r3 : number of rounds
124 */
125 .align 6
126aes_encrypt:
127 add ip, r2, #32 @ 3rd round key
128.Laes_encrypt_tweak:
129 do_block enc_dround, enc_fround
130ENDPROC(aes_encrypt)
131
132 .align 6
133aes_decrypt:
134 add ip, r2, #32 @ 3rd round key
135 do_block dec_dround, dec_fround
136ENDPROC(aes_decrypt)
137
138 .align 6
139aes_encrypt_3x:
140 add ip, r2, #32 @ 3rd round key
141 do_block enc_dround_3x, enc_fround_3x
142ENDPROC(aes_encrypt_3x)
143
144 .align 6
145aes_decrypt_3x:
146 add ip, r2, #32 @ 3rd round key
147 do_block dec_dround_3x, dec_fround_3x
148ENDPROC(aes_decrypt_3x)
149
150 .macro prepare_key, rk, rounds
151 add ip, \rk, \rounds, lsl #4
152 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
153 vld1.8 {q14}, [ip] @ load last round key
154 .endm
155
156 /*
157 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
158 * int blocks)
159 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
160 * int blocks)
161 */
162ENTRY(ce_aes_ecb_encrypt)
163 push {r4, lr}
164 ldr r4, [sp, #8]
165 prepare_key r2, r3
166.Lecbencloop3x:
167 subs r4, r4, #3
168 bmi .Lecbenc1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000169 vld1.8 {q0-q1}, [r1]!
170 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100171 bl aes_encrypt_3x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000172 vst1.8 {q0-q1}, [r0]!
173 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100174 b .Lecbencloop3x
175.Lecbenc1x:
176 adds r4, r4, #3
177 beq .Lecbencout
178.Lecbencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000179 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100180 bl aes_encrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000181 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100182 subs r4, r4, #1
183 bne .Lecbencloop
184.Lecbencout:
185 pop {r4, pc}
186ENDPROC(ce_aes_ecb_encrypt)
187
188ENTRY(ce_aes_ecb_decrypt)
189 push {r4, lr}
190 ldr r4, [sp, #8]
191 prepare_key r2, r3
192.Lecbdecloop3x:
193 subs r4, r4, #3
194 bmi .Lecbdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000195 vld1.8 {q0-q1}, [r1]!
196 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100197 bl aes_decrypt_3x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000198 vst1.8 {q0-q1}, [r0]!
199 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100200 b .Lecbdecloop3x
201.Lecbdec1x:
202 adds r4, r4, #3
203 beq .Lecbdecout
204.Lecbdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000205 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100206 bl aes_decrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000207 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100208 subs r4, r4, #1
209 bne .Lecbdecloop
210.Lecbdecout:
211 pop {r4, pc}
212ENDPROC(ce_aes_ecb_decrypt)
213
214 /*
215 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
216 * int blocks, u8 iv[])
217 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
218 * int blocks, u8 iv[])
219 */
220ENTRY(ce_aes_cbc_encrypt)
221 push {r4-r6, lr}
222 ldrd r4, r5, [sp, #16]
223 vld1.8 {q0}, [r5]
224 prepare_key r2, r3
225.Lcbcencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000226 vld1.8 {q1}, [r1]! @ get next pt block
Ard Biesheuvel86464852015-03-10 09:47:47 +0100227 veor q0, q0, q1 @ ..and xor with iv
228 bl aes_encrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000229 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100230 subs r4, r4, #1
231 bne .Lcbcencloop
232 vst1.8 {q0}, [r5]
233 pop {r4-r6, pc}
234ENDPROC(ce_aes_cbc_encrypt)
235
236ENTRY(ce_aes_cbc_decrypt)
237 push {r4-r6, lr}
238 ldrd r4, r5, [sp, #16]
239 vld1.8 {q6}, [r5] @ keep iv in q6
240 prepare_key r2, r3
241.Lcbcdecloop3x:
242 subs r4, r4, #3
243 bmi .Lcbcdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000244 vld1.8 {q0-q1}, [r1]!
245 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100246 vmov q3, q0
247 vmov q4, q1
248 vmov q5, q2
249 bl aes_decrypt_3x
250 veor q0, q0, q6
251 veor q1, q1, q3
252 veor q2, q2, q4
253 vmov q6, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000254 vst1.8 {q0-q1}, [r0]!
255 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100256 b .Lcbcdecloop3x
257.Lcbcdec1x:
258 adds r4, r4, #3
259 beq .Lcbcdecout
260 vmov q15, q14 @ preserve last round key
261.Lcbcdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000262 vld1.8 {q0}, [r1]! @ get next ct block
Ard Biesheuvel86464852015-03-10 09:47:47 +0100263 veor q14, q15, q6 @ combine prev ct with last key
264 vmov q6, q0
265 bl aes_decrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000266 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100267 subs r4, r4, #1
268 bne .Lcbcdecloop
269.Lcbcdecout:
270 vst1.8 {q6}, [r5] @ keep iv in q6
271 pop {r4-r6, pc}
272ENDPROC(ce_aes_cbc_decrypt)
273
274 /*
275 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
276 * int blocks, u8 ctr[])
277 */
278ENTRY(ce_aes_ctr_encrypt)
279 push {r4-r6, lr}
280 ldrd r4, r5, [sp, #16]
281 vld1.8 {q6}, [r5] @ load ctr
282 prepare_key r2, r3
283 vmov r6, s27 @ keep swabbed ctr in r6
284 rev r6, r6
285 cmn r6, r4 @ 32 bit overflow?
286 bcs .Lctrloop
287.Lctrloop3x:
288 subs r4, r4, #3
289 bmi .Lctr1x
290 add r6, r6, #1
291 vmov q0, q6
292 vmov q1, q6
293 rev ip, r6
294 add r6, r6, #1
295 vmov q2, q6
296 vmov s7, ip
297 rev ip, r6
298 add r6, r6, #1
299 vmov s11, ip
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000300 vld1.8 {q3-q4}, [r1]!
301 vld1.8 {q5}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100302 bl aes_encrypt_3x
303 veor q0, q0, q3
304 veor q1, q1, q4
305 veor q2, q2, q5
306 rev ip, r6
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000307 vst1.8 {q0-q1}, [r0]!
308 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100309 vmov s27, ip
310 b .Lctrloop3x
311.Lctr1x:
312 adds r4, r4, #3
313 beq .Lctrout
314.Lctrloop:
315 vmov q0, q6
316 bl aes_encrypt
Ard Biesheuvel86464852015-03-10 09:47:47 +0100317
318 adds r6, r6, #1 @ increment BE ctr
319 rev ip, r6
320 vmov s27, ip
321 bcs .Lctrcarry
Eric Biggers511306b2019-02-14 00:03:55 -0800322
323.Lctrcarrydone:
324 subs r4, r4, #1
325 bmi .Lctrtailblock @ blocks < 0 means tail block
326 vld1.8 {q3}, [r1]!
327 veor q3, q0, q3
328 vst1.8 {q3}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100329 bne .Lctrloop
Eric Biggers511306b2019-02-14 00:03:55 -0800330
Ard Biesheuvel86464852015-03-10 09:47:47 +0100331.Lctrout:
Eric Biggers511306b2019-02-14 00:03:55 -0800332 vst1.8 {q6}, [r5] @ return next CTR value
Ard Biesheuvel86464852015-03-10 09:47:47 +0100333 pop {r4-r6, pc}
334
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000335.Lctrtailblock:
Eric Biggers511306b2019-02-14 00:03:55 -0800336 vst1.8 {q0}, [r0, :64] @ return the key stream
337 b .Lctrout
Ard Biesheuvel86464852015-03-10 09:47:47 +0100338
339.Lctrcarry:
340 .irp sreg, s26, s25, s24
341 vmov ip, \sreg @ load next word of ctr
342 rev ip, ip @ ... to handle the carry
343 adds ip, ip, #1
344 rev ip, ip
345 vmov \sreg, ip
Eric Biggers511306b2019-02-14 00:03:55 -0800346 bcc .Lctrcarrydone
Ard Biesheuvel86464852015-03-10 09:47:47 +0100347 .endr
Eric Biggers511306b2019-02-14 00:03:55 -0800348 b .Lctrcarrydone
Ard Biesheuvel86464852015-03-10 09:47:47 +0100349ENDPROC(ce_aes_ctr_encrypt)
350
351 /*
352 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
353 * int blocks, u8 iv[], u8 const rk2[], int first)
354 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
355 * int blocks, u8 iv[], u8 const rk2[], int first)
356 */
357
358 .macro next_tweak, out, in, const, tmp
359 vshr.s64 \tmp, \in, #63
360 vand \tmp, \tmp, \const
361 vadd.u64 \out, \in, \in
362 vext.8 \tmp, \tmp, \tmp, #8
363 veor \out, \out, \tmp
364 .endm
365
366 .align 3
367.Lxts_mul_x:
368 .quad 1, 0x87
369
370ce_aes_xts_init:
371 vldr d14, .Lxts_mul_x
372 vldr d15, .Lxts_mul_x + 8
373
374 ldrd r4, r5, [sp, #16] @ load args
375 ldr r6, [sp, #28]
376 vld1.8 {q0}, [r5] @ load iv
377 teq r6, #1 @ start of a block?
378 bxne lr
379
380 @ Encrypt the IV in q0 with the second AES key. This should only
381 @ be done at the start of a block.
382 ldr r6, [sp, #24] @ load AES key 2
383 prepare_key r6, r3
384 add ip, r6, #32 @ 3rd round key of key 2
385 b .Laes_encrypt_tweak @ tail call
386ENDPROC(ce_aes_xts_init)
387
388ENTRY(ce_aes_xts_encrypt)
389 push {r4-r6, lr}
390
391 bl ce_aes_xts_init @ run shared prologue
392 prepare_key r2, r3
393 vmov q3, q0
394
395 teq r6, #0 @ start of a block?
396 bne .Lxtsenc3x
397
398.Lxtsencloop3x:
399 next_tweak q3, q3, q7, q6
400.Lxtsenc3x:
401 subs r4, r4, #3
402 bmi .Lxtsenc1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000403 vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
404 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100405 next_tweak q4, q3, q7, q6
406 veor q0, q0, q3
407 next_tweak q5, q4, q7, q6
408 veor q1, q1, q4
409 veor q2, q2, q5
410 bl aes_encrypt_3x
411 veor q0, q0, q3
412 veor q1, q1, q4
413 veor q2, q2, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000414 vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
415 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100416 vmov q3, q5
417 teq r4, #0
418 beq .Lxtsencout
419 b .Lxtsencloop3x
420.Lxtsenc1x:
421 adds r4, r4, #3
422 beq .Lxtsencout
423.Lxtsencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000424 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100425 veor q0, q0, q3
426 bl aes_encrypt
427 veor q0, q0, q3
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000428 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100429 subs r4, r4, #1
430 beq .Lxtsencout
431 next_tweak q3, q3, q7, q6
432 b .Lxtsencloop
433.Lxtsencout:
434 vst1.8 {q3}, [r5]
435 pop {r4-r6, pc}
436ENDPROC(ce_aes_xts_encrypt)
437
438
439ENTRY(ce_aes_xts_decrypt)
440 push {r4-r6, lr}
441
442 bl ce_aes_xts_init @ run shared prologue
443 prepare_key r2, r3
444 vmov q3, q0
445
446 teq r6, #0 @ start of a block?
447 bne .Lxtsdec3x
448
449.Lxtsdecloop3x:
450 next_tweak q3, q3, q7, q6
451.Lxtsdec3x:
452 subs r4, r4, #3
453 bmi .Lxtsdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000454 vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
455 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100456 next_tweak q4, q3, q7, q6
457 veor q0, q0, q3
458 next_tweak q5, q4, q7, q6
459 veor q1, q1, q4
460 veor q2, q2, q5
461 bl aes_decrypt_3x
462 veor q0, q0, q3
463 veor q1, q1, q4
464 veor q2, q2, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000465 vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
466 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100467 vmov q3, q5
468 teq r4, #0
469 beq .Lxtsdecout
470 b .Lxtsdecloop3x
471.Lxtsdec1x:
472 adds r4, r4, #3
473 beq .Lxtsdecout
474.Lxtsdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000475 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100476 veor q0, q0, q3
477 add ip, r2, #32 @ 3rd round key
478 bl aes_decrypt
479 veor q0, q0, q3
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000480 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100481 subs r4, r4, #1
482 beq .Lxtsdecout
483 next_tweak q3, q3, q7, q6
484 b .Lxtsdecloop
485.Lxtsdecout:
486 vst1.8 {q3}, [r5]
487 pop {r4-r6, pc}
488ENDPROC(ce_aes_xts_decrypt)
489
490 /*
491 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
492 * AES sbox substitution on each byte in
493 * 'input'
494 */
495ENTRY(ce_aes_sub)
496 vdup.32 q1, r0
497 veor q0, q0, q0
498 aese.8 q0, q1
499 vmov r0, s0
500 bx lr
501ENDPROC(ce_aes_sub)
502
503 /*
504 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
505 * operation on round key *src
506 */
507ENTRY(ce_aes_invert)
508 vld1.8 {q0}, [r1]
509 aesimc.8 q0, q0
510 vst1.8 {q0}, [r0]
511 bx lr
512ENDPROC(ce_aes_invert)