blob: 37d9b13dfd852a2b1c4fd27aad88f3abade003c0 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +02002/*
3 * AES-NI + SSE2 implementation of AEGIS-128L
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +02007 */
8
9#include <linux/linkage.h>
10#include <asm/frame.h>
11
12#define STATE0 %xmm0
13#define STATE1 %xmm1
14#define STATE2 %xmm2
15#define STATE3 %xmm3
16#define STATE4 %xmm4
17#define STATE5 %xmm5
18#define MSG %xmm6
19#define T0 %xmm7
20#define T1 %xmm8
21#define T2 %xmm9
22#define T3 %xmm10
23
24#define STATEP %rdi
25#define LEN %rsi
26#define SRC %rdx
27#define DST %rcx
28
29.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
30.align 16
31.Laegis256_const_0:
32 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
33 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
34.Laegis256_const_1:
35 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
36 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
37
38.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
39.align 16
40.Laegis256_counter:
41 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
42 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
43
44.text
45
46/*
47 * __load_partial: internal ABI
48 * input:
49 * LEN - bytes
50 * SRC - src
51 * output:
52 * MSG - message block
53 * changed:
54 * T0
55 * %r8
56 * %r9
57 */
58__load_partial:
Jan Beulicha7bea832018-07-02 04:31:54 -060059 xor %r9d, %r9d
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +020060 pxor MSG, MSG
61
62 mov LEN, %r8
63 and $0x1, %r8
64 jz .Lld_partial_1
65
66 mov LEN, %r8
67 and $0x1E, %r8
68 add SRC, %r8
69 mov (%r8), %r9b
70
71.Lld_partial_1:
72 mov LEN, %r8
73 and $0x2, %r8
74 jz .Lld_partial_2
75
76 mov LEN, %r8
77 and $0x1C, %r8
78 add SRC, %r8
79 shl $0x10, %r9
80 mov (%r8), %r9w
81
82.Lld_partial_2:
83 mov LEN, %r8
84 and $0x4, %r8
85 jz .Lld_partial_4
86
87 mov LEN, %r8
88 and $0x18, %r8
89 add SRC, %r8
90 shl $32, %r9
91 mov (%r8), %r8d
92 xor %r8, %r9
93
94.Lld_partial_4:
95 movq %r9, MSG
96
97 mov LEN, %r8
98 and $0x8, %r8
99 jz .Lld_partial_8
100
101 mov LEN, %r8
102 and $0x10, %r8
103 add SRC, %r8
104 pslldq $8, MSG
105 movq (%r8), T0
106 pxor T0, MSG
107
108.Lld_partial_8:
109 ret
110ENDPROC(__load_partial)
111
112/*
113 * __store_partial: internal ABI
114 * input:
115 * LEN - bytes
116 * DST - dst
117 * output:
118 * T0 - message block
119 * changed:
120 * %r8
121 * %r9
122 * %r10
123 */
124__store_partial:
125 mov LEN, %r8
126 mov DST, %r9
127
128 movq T0, %r10
129
130 cmp $8, %r8
131 jl .Lst_partial_8
132
133 mov %r10, (%r9)
134 psrldq $8, T0
135 movq T0, %r10
136
137 sub $8, %r8
138 add $8, %r9
139
140.Lst_partial_8:
141 cmp $4, %r8
142 jl .Lst_partial_4
143
144 mov %r10d, (%r9)
145 shr $32, %r10
146
147 sub $4, %r8
148 add $4, %r9
149
150.Lst_partial_4:
151 cmp $2, %r8
152 jl .Lst_partial_2
153
154 mov %r10w, (%r9)
155 shr $0x10, %r10
156
157 sub $2, %r8
158 add $2, %r9
159
160.Lst_partial_2:
161 cmp $1, %r8
162 jl .Lst_partial_1
163
164 mov %r10b, (%r9)
165
166.Lst_partial_1:
167 ret
168ENDPROC(__store_partial)
169
170.macro update
171 movdqa STATE5, T0
172 aesenc STATE0, STATE5
173 aesenc STATE1, STATE0
174 aesenc STATE2, STATE1
175 aesenc STATE3, STATE2
176 aesenc STATE4, STATE3
177 aesenc T0, STATE4
178.endm
179
180.macro update0 m
181 update
182 pxor \m, STATE5
183.endm
184
185.macro update1 m
186 update
187 pxor \m, STATE4
188.endm
189
190.macro update2 m
191 update
192 pxor \m, STATE3
193.endm
194
195.macro update3 m
196 update
197 pxor \m, STATE2
198.endm
199
200.macro update4 m
201 update
202 pxor \m, STATE1
203.endm
204
205.macro update5 m
206 update
207 pxor \m, STATE0
208.endm
209
210.macro state_load
211 movdqu 0x00(STATEP), STATE0
212 movdqu 0x10(STATEP), STATE1
213 movdqu 0x20(STATEP), STATE2
214 movdqu 0x30(STATEP), STATE3
215 movdqu 0x40(STATEP), STATE4
216 movdqu 0x50(STATEP), STATE5
217.endm
218
219.macro state_store s0 s1 s2 s3 s4 s5
220 movdqu \s5, 0x00(STATEP)
221 movdqu \s0, 0x10(STATEP)
222 movdqu \s1, 0x20(STATEP)
223 movdqu \s2, 0x30(STATEP)
224 movdqu \s3, 0x40(STATEP)
225 movdqu \s4, 0x50(STATEP)
226.endm
227
228.macro state_store0
229 state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
230.endm
231
232.macro state_store1
233 state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
234.endm
235
236.macro state_store2
237 state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
238.endm
239
240.macro state_store3
241 state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
242.endm
243
244.macro state_store4
245 state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
246.endm
247
248.macro state_store5
249 state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
250.endm
251
252/*
253 * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
254 */
255ENTRY(crypto_aegis256_aesni_init)
256 FRAME_BEGIN
257
258 /* load key: */
259 movdqa 0x00(%rsi), MSG
260 movdqa 0x10(%rsi), T1
261 movdqa MSG, STATE4
262 movdqa T1, STATE5
263
264 /* load IV: */
265 movdqu 0x00(%rdx), T2
266 movdqu 0x10(%rdx), T3
267 pxor MSG, T2
268 pxor T1, T3
269 movdqa T2, STATE0
270 movdqa T3, STATE1
271
272 /* load the constants: */
273 movdqa .Laegis256_const_0, STATE3
274 movdqa .Laegis256_const_1, STATE2
275 pxor STATE3, STATE4
276 pxor STATE2, STATE5
277
278 /* update 10 times with IV and KEY: */
279 update0 MSG
280 update1 T1
281 update2 T2
282 update3 T3
283 update4 MSG
284 update5 T1
285 update0 T2
286 update1 T3
287 update2 MSG
288 update3 T1
289 update4 T2
290 update5 T3
291 update0 MSG
292 update1 T1
293 update2 T2
294 update3 T3
295
296 state_store3
297
298 FRAME_END
299 ret
300ENDPROC(crypto_aegis256_aesni_init)
301
302.macro ad_block a i
303 movdq\a (\i * 0x10)(SRC), MSG
304 update\i MSG
305 sub $0x10, LEN
306 cmp $0x10, LEN
307 jl .Lad_out_\i
308.endm
309
310/*
311 * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
312 * const void *data);
313 */
314ENTRY(crypto_aegis256_aesni_ad)
315 FRAME_BEGIN
316
317 cmp $0x10, LEN
318 jb .Lad_out
319
320 state_load
321
322 mov SRC, %r8
323 and $0xf, %r8
324 jnz .Lad_u_loop
325
326.align 8
327.Lad_a_loop:
328 ad_block a 0
329 ad_block a 1
330 ad_block a 2
331 ad_block a 3
332 ad_block a 4
333 ad_block a 5
334
335 add $0x60, SRC
336 jmp .Lad_a_loop
337
338.align 8
339.Lad_u_loop:
340 ad_block u 0
341 ad_block u 1
342 ad_block u 2
343 ad_block u 3
344 ad_block u 4
345 ad_block u 5
346
347 add $0x60, SRC
348 jmp .Lad_u_loop
349
350.Lad_out_0:
351 state_store0
352 FRAME_END
353 ret
354
355.Lad_out_1:
356 state_store1
357 FRAME_END
358 ret
359
360.Lad_out_2:
361 state_store2
362 FRAME_END
363 ret
364
365.Lad_out_3:
366 state_store3
367 FRAME_END
368 ret
369
370.Lad_out_4:
371 state_store4
372 FRAME_END
373 ret
374
375.Lad_out_5:
376 state_store5
377 FRAME_END
378 ret
379
380.Lad_out:
381 FRAME_END
382 ret
383ENDPROC(crypto_aegis256_aesni_ad)
384
385.macro crypt m s0 s1 s2 s3 s4 s5
386 pxor \s1, \m
387 pxor \s4, \m
388 pxor \s5, \m
389 movdqa \s2, T3
390 pand \s3, T3
391 pxor T3, \m
392.endm
393
394.macro crypt0 m
395 crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
396.endm
397
398.macro crypt1 m
399 crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
400.endm
401
402.macro crypt2 m
403 crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
404.endm
405
406.macro crypt3 m
407 crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
408.endm
409
410.macro crypt4 m
411 crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
412.endm
413
414.macro crypt5 m
415 crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
416.endm
417
418.macro encrypt_block a i
419 movdq\a (\i * 0x10)(SRC), MSG
420 movdqa MSG, T0
421 crypt\i T0
422 movdq\a T0, (\i * 0x10)(DST)
423
424 update\i MSG
425
426 sub $0x10, LEN
427 cmp $0x10, LEN
428 jl .Lenc_out_\i
429.endm
430
431.macro decrypt_block a i
432 movdq\a (\i * 0x10)(SRC), MSG
433 crypt\i MSG
434 movdq\a MSG, (\i * 0x10)(DST)
435
436 update\i MSG
437
438 sub $0x10, LEN
439 cmp $0x10, LEN
440 jl .Ldec_out_\i
441.endm
442
443/*
444 * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
445 * const void *src, void *dst);
446 */
447ENTRY(crypto_aegis256_aesni_enc)
448 FRAME_BEGIN
449
450 cmp $0x10, LEN
451 jb .Lenc_out
452
453 state_load
454
455 mov SRC, %r8
456 or DST, %r8
457 and $0xf, %r8
458 jnz .Lenc_u_loop
459
460.align 8
461.Lenc_a_loop:
462 encrypt_block a 0
463 encrypt_block a 1
464 encrypt_block a 2
465 encrypt_block a 3
466 encrypt_block a 4
467 encrypt_block a 5
468
469 add $0x60, SRC
470 add $0x60, DST
471 jmp .Lenc_a_loop
472
473.align 8
474.Lenc_u_loop:
475 encrypt_block u 0
476 encrypt_block u 1
477 encrypt_block u 2
478 encrypt_block u 3
479 encrypt_block u 4
480 encrypt_block u 5
481
482 add $0x60, SRC
483 add $0x60, DST
484 jmp .Lenc_u_loop
485
486.Lenc_out_0:
487 state_store0
488 FRAME_END
489 ret
490
491.Lenc_out_1:
492 state_store1
493 FRAME_END
494 ret
495
496.Lenc_out_2:
497 state_store2
498 FRAME_END
499 ret
500
501.Lenc_out_3:
502 state_store3
503 FRAME_END
504 ret
505
506.Lenc_out_4:
507 state_store4
508 FRAME_END
509 ret
510
511.Lenc_out_5:
512 state_store5
513 FRAME_END
514 ret
515
516.Lenc_out:
517 FRAME_END
518 ret
519ENDPROC(crypto_aegis256_aesni_enc)
520
521/*
522 * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
523 * const void *src, void *dst);
524 */
525ENTRY(crypto_aegis256_aesni_enc_tail)
526 FRAME_BEGIN
527
528 state_load
529
530 /* encrypt message: */
531 call __load_partial
532
533 movdqa MSG, T0
534 crypt0 T0
535
536 call __store_partial
537
538 update0 MSG
539
540 state_store0
541
542 FRAME_END
Borislav Petkov221e00d2018-06-23 12:36:22 +0200543 ret
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +0200544ENDPROC(crypto_aegis256_aesni_enc_tail)
545
546/*
547 * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
548 * const void *src, void *dst);
549 */
550ENTRY(crypto_aegis256_aesni_dec)
551 FRAME_BEGIN
552
553 cmp $0x10, LEN
554 jb .Ldec_out
555
556 state_load
557
558 mov SRC, %r8
559 or DST, %r8
560 and $0xF, %r8
561 jnz .Ldec_u_loop
562
563.align 8
564.Ldec_a_loop:
565 decrypt_block a 0
566 decrypt_block a 1
567 decrypt_block a 2
568 decrypt_block a 3
569 decrypt_block a 4
570 decrypt_block a 5
571
572 add $0x60, SRC
573 add $0x60, DST
574 jmp .Ldec_a_loop
575
576.align 8
577.Ldec_u_loop:
578 decrypt_block u 0
579 decrypt_block u 1
580 decrypt_block u 2
581 decrypt_block u 3
582 decrypt_block u 4
583 decrypt_block u 5
584
585 add $0x60, SRC
586 add $0x60, DST
587 jmp .Ldec_u_loop
588
589.Ldec_out_0:
590 state_store0
591 FRAME_END
592 ret
593
594.Ldec_out_1:
595 state_store1
596 FRAME_END
597 ret
598
599.Ldec_out_2:
600 state_store2
601 FRAME_END
602 ret
603
604.Ldec_out_3:
605 state_store3
606 FRAME_END
607 ret
608
609.Ldec_out_4:
610 state_store4
611 FRAME_END
612 ret
613
614.Ldec_out_5:
615 state_store5
616 FRAME_END
617 ret
618
619.Ldec_out:
620 FRAME_END
621 ret
622ENDPROC(crypto_aegis256_aesni_dec)
623
624/*
625 * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
626 * const void *src, void *dst);
627 */
628ENTRY(crypto_aegis256_aesni_dec_tail)
629 FRAME_BEGIN
630
631 state_load
632
633 /* decrypt message: */
634 call __load_partial
635
636 crypt0 MSG
637
638 movdqa MSG, T0
639 call __store_partial
640
641 /* mask with byte count: */
642 movq LEN, T0
643 punpcklbw T0, T0
644 punpcklbw T0, T0
645 punpcklbw T0, T0
646 punpcklbw T0, T0
647 movdqa .Laegis256_counter, T1
648 pcmpgtb T1, T0
649 pand T0, MSG
650
651 update0 MSG
652
653 state_store0
654
655 FRAME_END
656 ret
657ENDPROC(crypto_aegis256_aesni_dec_tail)
658
659/*
660 * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
661 * u64 assoclen, u64 cryptlen);
662 */
663ENTRY(crypto_aegis256_aesni_final)
664 FRAME_BEGIN
665
666 state_load
667
668 /* prepare length block: */
669 movq %rdx, MSG
670 movq %rcx, T0
671 pslldq $8, T0
672 pxor T0, MSG
673 psllq $3, MSG /* multiply by 8 (to get bit count) */
674
675 pxor STATE3, MSG
676
677 /* update state: */
678 update0 MSG
679 update1 MSG
680 update2 MSG
681 update3 MSG
682 update4 MSG
683 update5 MSG
684 update0 MSG
685
686 /* xor tag: */
687 movdqu (%rsi), MSG
688
689 pxor STATE0, MSG
690 pxor STATE1, MSG
691 pxor STATE2, MSG
692 pxor STATE3, MSG
693 pxor STATE4, MSG
694 pxor STATE5, MSG
695
696 movdqu MSG, (%rsi)
697
698 FRAME_END
699 ret
700ENDPROC(crypto_aegis256_aesni_final)