blob: bc53bcaa772e7fcac4d14cc08b6bed44672dcb2b [file] [log] [blame]
Ard Biesheuvel86464852015-03-10 09:47:47 +01001/*
2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .fpu crypto-neon-fp-armv8
16 .align 3
17
18 .macro enc_round, state, key
19 aese.8 \state, \key
20 aesmc.8 \state, \state
21 .endm
22
23 .macro dec_round, state, key
24 aesd.8 \state, \key
25 aesimc.8 \state, \state
26 .endm
27
28 .macro enc_dround, key1, key2
29 enc_round q0, \key1
30 enc_round q0, \key2
31 .endm
32
33 .macro dec_dround, key1, key2
34 dec_round q0, \key1
35 dec_round q0, \key2
36 .endm
37
38 .macro enc_fround, key1, key2, key3
39 enc_round q0, \key1
40 aese.8 q0, \key2
41 veor q0, q0, \key3
42 .endm
43
44 .macro dec_fround, key1, key2, key3
45 dec_round q0, \key1
46 aesd.8 q0, \key2
47 veor q0, q0, \key3
48 .endm
49
50 .macro enc_dround_3x, key1, key2
51 enc_round q0, \key1
52 enc_round q1, \key1
53 enc_round q2, \key1
54 enc_round q0, \key2
55 enc_round q1, \key2
56 enc_round q2, \key2
57 .endm
58
59 .macro dec_dround_3x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q0, \key2
64 dec_round q1, \key2
65 dec_round q2, \key2
66 .endm
67
68 .macro enc_fround_3x, key1, key2, key3
69 enc_round q0, \key1
70 enc_round q1, \key1
71 enc_round q2, \key1
72 aese.8 q0, \key2
73 aese.8 q1, \key2
74 aese.8 q2, \key2
75 veor q0, q0, \key3
76 veor q1, q1, \key3
77 veor q2, q2, \key3
78 .endm
79
80 .macro dec_fround_3x, key1, key2, key3
81 dec_round q0, \key1
82 dec_round q1, \key1
83 dec_round q2, \key1
84 aesd.8 q0, \key2
85 aesd.8 q1, \key2
86 aesd.8 q2, \key2
87 veor q0, q0, \key3
88 veor q1, q1, \key3
89 veor q2, q2, \key3
90 .endm
91
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
95 \dround q8, q9
96 vld1.8 {q12-q13}, [ip]!
97 \dround q10, q11
98 vld1.8 {q10-q11}, [ip]!
99 \dround q12, q13
100 vld1.8 {q12-q13}, [ip]!
101 \dround q10, q11
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100104 \dround q12, q13
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +0200105 beq 1f @ AES-192: 12 rounds
Ard Biesheuvel86464852015-03-10 09:47:47 +0100106 vld1.8 {q12-q13}, [ip]
107 \dround q10, q11
1080: \fround q12, q13, q14
109 bx lr
110
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +02001111: \fround q10, q11, q14
Ard Biesheuvel86464852015-03-10 09:47:47 +0100112 bx lr
113 .endm
114
115 /*
116 * Internal, non-AAPCS compliant functions that implement the core AES
117 * transforms. These should preserve all registers except q0 - q2 and ip
118 * Arguments:
119 * q0 : first in/output block
120 * q1 : second in/output block (_3x version only)
121 * q2 : third in/output block (_3x version only)
122 * q8 : first round key
123 * q9 : secound round key
Ard Biesheuvel86464852015-03-10 09:47:47 +0100124 * q14 : final round key
Ard Biesheuvel6499e8c2015-05-08 10:46:22 +0200125 * r2 : address of round key array
Ard Biesheuvel86464852015-03-10 09:47:47 +0100126 * r3 : number of rounds
127 */
128 .align 6
129aes_encrypt:
130 add ip, r2, #32 @ 3rd round key
131.Laes_encrypt_tweak:
132 do_block enc_dround, enc_fround
133ENDPROC(aes_encrypt)
134
135 .align 6
136aes_decrypt:
137 add ip, r2, #32 @ 3rd round key
138 do_block dec_dround, dec_fround
139ENDPROC(aes_decrypt)
140
141 .align 6
142aes_encrypt_3x:
143 add ip, r2, #32 @ 3rd round key
144 do_block enc_dround_3x, enc_fround_3x
145ENDPROC(aes_encrypt_3x)
146
147 .align 6
148aes_decrypt_3x:
149 add ip, r2, #32 @ 3rd round key
150 do_block dec_dround_3x, dec_fround_3x
151ENDPROC(aes_decrypt_3x)
152
153 .macro prepare_key, rk, rounds
154 add ip, \rk, \rounds, lsl #4
155 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
156 vld1.8 {q14}, [ip] @ load last round key
157 .endm
158
159 /*
160 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
161 * int blocks)
162 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
163 * int blocks)
164 */
165ENTRY(ce_aes_ecb_encrypt)
166 push {r4, lr}
167 ldr r4, [sp, #8]
168 prepare_key r2, r3
169.Lecbencloop3x:
170 subs r4, r4, #3
171 bmi .Lecbenc1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000172 vld1.8 {q0-q1}, [r1]!
173 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100174 bl aes_encrypt_3x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000175 vst1.8 {q0-q1}, [r0]!
176 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100177 b .Lecbencloop3x
178.Lecbenc1x:
179 adds r4, r4, #3
180 beq .Lecbencout
181.Lecbencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000182 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100183 bl aes_encrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000184 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100185 subs r4, r4, #1
186 bne .Lecbencloop
187.Lecbencout:
188 pop {r4, pc}
189ENDPROC(ce_aes_ecb_encrypt)
190
191ENTRY(ce_aes_ecb_decrypt)
192 push {r4, lr}
193 ldr r4, [sp, #8]
194 prepare_key r2, r3
195.Lecbdecloop3x:
196 subs r4, r4, #3
197 bmi .Lecbdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000198 vld1.8 {q0-q1}, [r1]!
199 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100200 bl aes_decrypt_3x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000201 vst1.8 {q0-q1}, [r0]!
202 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100203 b .Lecbdecloop3x
204.Lecbdec1x:
205 adds r4, r4, #3
206 beq .Lecbdecout
207.Lecbdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000208 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100209 bl aes_decrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000210 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100211 subs r4, r4, #1
212 bne .Lecbdecloop
213.Lecbdecout:
214 pop {r4, pc}
215ENDPROC(ce_aes_ecb_decrypt)
216
217 /*
218 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219 * int blocks, u8 iv[])
220 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221 * int blocks, u8 iv[])
222 */
223ENTRY(ce_aes_cbc_encrypt)
224 push {r4-r6, lr}
225 ldrd r4, r5, [sp, #16]
226 vld1.8 {q0}, [r5]
227 prepare_key r2, r3
228.Lcbcencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000229 vld1.8 {q1}, [r1]! @ get next pt block
Ard Biesheuvel86464852015-03-10 09:47:47 +0100230 veor q0, q0, q1 @ ..and xor with iv
231 bl aes_encrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000232 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100233 subs r4, r4, #1
234 bne .Lcbcencloop
235 vst1.8 {q0}, [r5]
236 pop {r4-r6, pc}
237ENDPROC(ce_aes_cbc_encrypt)
238
239ENTRY(ce_aes_cbc_decrypt)
240 push {r4-r6, lr}
241 ldrd r4, r5, [sp, #16]
242 vld1.8 {q6}, [r5] @ keep iv in q6
243 prepare_key r2, r3
244.Lcbcdecloop3x:
245 subs r4, r4, #3
246 bmi .Lcbcdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000247 vld1.8 {q0-q1}, [r1]!
248 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100249 vmov q3, q0
250 vmov q4, q1
251 vmov q5, q2
252 bl aes_decrypt_3x
253 veor q0, q0, q6
254 veor q1, q1, q3
255 veor q2, q2, q4
256 vmov q6, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000257 vst1.8 {q0-q1}, [r0]!
258 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100259 b .Lcbcdecloop3x
260.Lcbcdec1x:
261 adds r4, r4, #3
262 beq .Lcbcdecout
263 vmov q15, q14 @ preserve last round key
264.Lcbcdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000265 vld1.8 {q0}, [r1]! @ get next ct block
Ard Biesheuvel86464852015-03-10 09:47:47 +0100266 veor q14, q15, q6 @ combine prev ct with last key
267 vmov q6, q0
268 bl aes_decrypt
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000269 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100270 subs r4, r4, #1
271 bne .Lcbcdecloop
272.Lcbcdecout:
273 vst1.8 {q6}, [r5] @ keep iv in q6
274 pop {r4-r6, pc}
275ENDPROC(ce_aes_cbc_decrypt)
276
277 /*
278 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279 * int blocks, u8 ctr[])
280 */
281ENTRY(ce_aes_ctr_encrypt)
282 push {r4-r6, lr}
283 ldrd r4, r5, [sp, #16]
284 vld1.8 {q6}, [r5] @ load ctr
285 prepare_key r2, r3
286 vmov r6, s27 @ keep swabbed ctr in r6
287 rev r6, r6
288 cmn r6, r4 @ 32 bit overflow?
289 bcs .Lctrloop
290.Lctrloop3x:
291 subs r4, r4, #3
292 bmi .Lctr1x
293 add r6, r6, #1
294 vmov q0, q6
295 vmov q1, q6
296 rev ip, r6
297 add r6, r6, #1
298 vmov q2, q6
299 vmov s7, ip
300 rev ip, r6
301 add r6, r6, #1
302 vmov s11, ip
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000303 vld1.8 {q3-q4}, [r1]!
304 vld1.8 {q5}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100305 bl aes_encrypt_3x
306 veor q0, q0, q3
307 veor q1, q1, q4
308 veor q2, q2, q5
309 rev ip, r6
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000310 vst1.8 {q0-q1}, [r0]!
311 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100312 vmov s27, ip
313 b .Lctrloop3x
314.Lctr1x:
315 adds r4, r4, #3
316 beq .Lctrout
317.Lctrloop:
318 vmov q0, q6
319 bl aes_encrypt
Ard Biesheuvel86464852015-03-10 09:47:47 +0100320
321 adds r6, r6, #1 @ increment BE ctr
322 rev ip, r6
323 vmov s27, ip
324 bcs .Lctrcarry
Eric Biggers511306b2019-02-14 00:03:55 -0800325
326.Lctrcarrydone:
327 subs r4, r4, #1
328 bmi .Lctrtailblock @ blocks < 0 means tail block
329 vld1.8 {q3}, [r1]!
330 veor q3, q0, q3
331 vst1.8 {q3}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100332 bne .Lctrloop
Eric Biggers511306b2019-02-14 00:03:55 -0800333
Ard Biesheuvel86464852015-03-10 09:47:47 +0100334.Lctrout:
Eric Biggers511306b2019-02-14 00:03:55 -0800335 vst1.8 {q6}, [r5] @ return next CTR value
Ard Biesheuvel86464852015-03-10 09:47:47 +0100336 pop {r4-r6, pc}
337
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000338.Lctrtailblock:
Eric Biggers511306b2019-02-14 00:03:55 -0800339 vst1.8 {q0}, [r0, :64] @ return the key stream
340 b .Lctrout
Ard Biesheuvel86464852015-03-10 09:47:47 +0100341
342.Lctrcarry:
343 .irp sreg, s26, s25, s24
344 vmov ip, \sreg @ load next word of ctr
345 rev ip, ip @ ... to handle the carry
346 adds ip, ip, #1
347 rev ip, ip
348 vmov \sreg, ip
Eric Biggers511306b2019-02-14 00:03:55 -0800349 bcc .Lctrcarrydone
Ard Biesheuvel86464852015-03-10 09:47:47 +0100350 .endr
Eric Biggers511306b2019-02-14 00:03:55 -0800351 b .Lctrcarrydone
Ard Biesheuvel86464852015-03-10 09:47:47 +0100352ENDPROC(ce_aes_ctr_encrypt)
353
354 /*
355 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
356 * int blocks, u8 iv[], u8 const rk2[], int first)
357 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358 * int blocks, u8 iv[], u8 const rk2[], int first)
359 */
360
361 .macro next_tweak, out, in, const, tmp
362 vshr.s64 \tmp, \in, #63
363 vand \tmp, \tmp, \const
364 vadd.u64 \out, \in, \in
365 vext.8 \tmp, \tmp, \tmp, #8
366 veor \out, \out, \tmp
367 .endm
368
369 .align 3
370.Lxts_mul_x:
371 .quad 1, 0x87
372
373ce_aes_xts_init:
374 vldr d14, .Lxts_mul_x
375 vldr d15, .Lxts_mul_x + 8
376
377 ldrd r4, r5, [sp, #16] @ load args
378 ldr r6, [sp, #28]
379 vld1.8 {q0}, [r5] @ load iv
380 teq r6, #1 @ start of a block?
381 bxne lr
382
383 @ Encrypt the IV in q0 with the second AES key. This should only
384 @ be done at the start of a block.
385 ldr r6, [sp, #24] @ load AES key 2
386 prepare_key r6, r3
387 add ip, r6, #32 @ 3rd round key of key 2
388 b .Laes_encrypt_tweak @ tail call
389ENDPROC(ce_aes_xts_init)
390
391ENTRY(ce_aes_xts_encrypt)
392 push {r4-r6, lr}
393
394 bl ce_aes_xts_init @ run shared prologue
395 prepare_key r2, r3
396 vmov q3, q0
397
398 teq r6, #0 @ start of a block?
399 bne .Lxtsenc3x
400
401.Lxtsencloop3x:
402 next_tweak q3, q3, q7, q6
403.Lxtsenc3x:
404 subs r4, r4, #3
405 bmi .Lxtsenc1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000406 vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
407 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100408 next_tweak q4, q3, q7, q6
409 veor q0, q0, q3
410 next_tweak q5, q4, q7, q6
411 veor q1, q1, q4
412 veor q2, q2, q5
413 bl aes_encrypt_3x
414 veor q0, q0, q3
415 veor q1, q1, q4
416 veor q2, q2, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000417 vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
418 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100419 vmov q3, q5
420 teq r4, #0
421 beq .Lxtsencout
422 b .Lxtsencloop3x
423.Lxtsenc1x:
424 adds r4, r4, #3
425 beq .Lxtsencout
426.Lxtsencloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000427 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100428 veor q0, q0, q3
429 bl aes_encrypt
430 veor q0, q0, q3
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000431 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100432 subs r4, r4, #1
433 beq .Lxtsencout
434 next_tweak q3, q3, q7, q6
435 b .Lxtsencloop
436.Lxtsencout:
437 vst1.8 {q3}, [r5]
438 pop {r4-r6, pc}
439ENDPROC(ce_aes_xts_encrypt)
440
441
442ENTRY(ce_aes_xts_decrypt)
443 push {r4-r6, lr}
444
445 bl ce_aes_xts_init @ run shared prologue
446 prepare_key r2, r3
447 vmov q3, q0
448
449 teq r6, #0 @ start of a block?
450 bne .Lxtsdec3x
451
452.Lxtsdecloop3x:
453 next_tweak q3, q3, q7, q6
454.Lxtsdec3x:
455 subs r4, r4, #3
456 bmi .Lxtsdec1x
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000457 vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
458 vld1.8 {q2}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100459 next_tweak q4, q3, q7, q6
460 veor q0, q0, q3
461 next_tweak q5, q4, q7, q6
462 veor q1, q1, q4
463 veor q2, q2, q5
464 bl aes_decrypt_3x
465 veor q0, q0, q3
466 veor q1, q1, q4
467 veor q2, q2, q5
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000468 vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
469 vst1.8 {q2}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100470 vmov q3, q5
471 teq r4, #0
472 beq .Lxtsdecout
473 b .Lxtsdecloop3x
474.Lxtsdec1x:
475 adds r4, r4, #3
476 beq .Lxtsdecout
477.Lxtsdecloop:
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000478 vld1.8 {q0}, [r1]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100479 veor q0, q0, q3
480 add ip, r2, #32 @ 3rd round key
481 bl aes_decrypt
482 veor q0, q0, q3
Ard Biesheuvel1465fb12017-01-28 23:25:31 +0000483 vst1.8 {q0}, [r0]!
Ard Biesheuvel86464852015-03-10 09:47:47 +0100484 subs r4, r4, #1
485 beq .Lxtsdecout
486 next_tweak q3, q3, q7, q6
487 b .Lxtsdecloop
488.Lxtsdecout:
489 vst1.8 {q3}, [r5]
490 pop {r4-r6, pc}
491ENDPROC(ce_aes_xts_decrypt)
492
493 /*
494 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
495 * AES sbox substitution on each byte in
496 * 'input'
497 */
498ENTRY(ce_aes_sub)
499 vdup.32 q1, r0
500 veor q0, q0, q0
501 aese.8 q0, q1
502 vmov r0, s0
503 bx lr
504ENDPROC(ce_aes_sub)
505
506 /*
507 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
508 * operation on round key *src
509 */
510ENTRY(ce_aes_invert)
511 vld1.8 {q0}, [r1]
512 aesimc.8 q0, q0
513 vst1.8 {q0}, [r0]
514 bx lr
515ENDPROC(ce_aes_invert)