diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa94..1a58ad89fdf 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o + +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e8331..20bb0e1ac68 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -16,6 +16,7 @@ */ #include +#include .text @@ -122,103 +123,72 @@ ENTRY(aesni_set_key) movups 0x10(%rsi), %xmm2 # other user key movaps %xmm2, (%rcx) add $0x10, %rcx - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_256a - # aeskeygenassist $0x1, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_256a - # aeskeygenassist $0x2, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_256a - # aeskeygenassist $0x4, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_256a - # aeskeygenassist $0x8, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_256a - # aeskeygenassist $0x10, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_256a - # aeskeygenassist $0x20, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_256a jmp .Ldec_key .Lenc_key192: movq 0x10(%rsi), %xmm2 # other user key - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_192a - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_192b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_192a - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_192b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_192a - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_192b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_192a - # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 + AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 call _key_expansion_192b jmp .Ldec_key .Lenc_key128: - # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 call _key_expansion_128 - # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 call _key_expansion_128 - # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 call _key_expansion_128 - # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 call _key_expansion_128 - # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 call _key_expansion_128 - # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 call _key_expansion_128 - # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 + AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 call _key_expansion_128 - # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 + AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 call _key_expansion_128 - # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b + AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 call _key_expansion_128 - # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 + AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 call _key_expansion_128 .Ldec_key: sub $0x10, %rcx @@ -231,8 +201,7 @@ ENTRY(aesni_set_key) .align 4 .Ldec_key_loop: movaps (%rdi), %xmm0 - # aesimc %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 + AESIMC %xmm0 %xmm1 movaps %xmm1, (%rsi) add $0x10, %rdi sub $0x10, %rsi @@ -274,51 +243,37 @@ _aesni_enc1: je .Lenc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps (TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + AESENCLAST KEY STATE ret /* @@ -353,135 +308,79 @@ _aesni_enc4: je .L4enc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps (TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 - # aesenclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 - # aesenclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xea - # aesenclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + AESENCLAST KEY STATE1 # last round + AESENCLAST KEY STATE2 + AESENCLAST KEY STATE3 + AESENCLAST KEY STATE4 ret /* @@ -518,51 +417,37 @@ _aesni_dec1: je .Ldec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps (TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + AESDECLAST KEY STATE ret /* @@ -597,135 +482,79 @@ _aesni_dec4: je .L4dec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps (TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 - # aesdeclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 - # aesdeclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xea - # aesdeclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + AESDECLAST KEY STATE1 # last round + AESDECLAST KEY STATE2 + AESDECLAST KEY STATE3 + AESDECLAST KEY STATE4 ret /* diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 00000000000..1eb7f90cb7b --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,157 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + +.data + +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +.Lpoly: + .octa 0xc2000000000000000000000000000001 +.Ltwo_one: + .octa 0x00000001000000000000000000000001 + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +__clmul_gf128mul_ble: + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 + PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 + PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + ret + +/* void clmul_ghash_mul(char *dst, const be128 *shash) */ +ENTRY(clmul_ghash_mul) + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + PSHUFB_XMM BSWAP DATA + call __clmul_gf128mul_ble + PSHUFB_XMM BSWAP DATA + movups DATA, (%rdi) + ret + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const be128 *shash); + */ +ENTRY(clmul_ghash_update) + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + PSHUFB_XMM BSWAP DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + PSHUFB_XMM BSWAP IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + PSHUFB_XMM BSWAP DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + ret + +/* + * void clmul_ghash_setkey(be128 *shash, const u8 *key); + * + * Calculate hash_key << 1 mod poly + */ +ENTRY(clmul_ghash_setkey) + movaps .Lbswap_mask, BSWAP + movups (%rsi), %xmm0 + PSHUFB_XMM BSWAP %xmm0 + movaps %xmm0, %xmm1 + psllq $1, %xmm0 + psrlq $63, %xmm1 + movaps %xmm1, %xmm2 + pslldq $8, %xmm1 + psrldq $8, %xmm2 + por %xmm1, %xmm0 + # reduction + pshufd $0b00100100, %xmm2, %xmm1 + pcmpeqd .Ltwo_one, %xmm1 + pand .Lpoly, %xmm1 + pxor %xmm1, %xmm0 + movups %xmm0, (%rdi) + ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 00000000000..cbcc8d8ea93 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,333 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const be128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +void clmul_ghash_setkey(be128 *shash, const u8 *key); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + be128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + clmul_ghash_setkey(&ctx->shash, key); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return 0; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!cpu_has_pclmulqdq) { + printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" + " detected.\n"); + return -ENODEV; + } + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " + "acclerated by PCLMULQDQ-NI"); +MODULE_ALIAS("ghash"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b9774..613700f27a4 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) +#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb758f..ebfb8a9e11f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_I387_H #define _ASM_X86_I387_H +#ifndef __ASSEMBLY__ + #include #include #include @@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } +#endif /* __ASSEMBLY__ */ + +#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 +#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 + #endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 00000000000..14cf526091f --- /dev/null +++ b/arch/x86/include/asm/inst.h @@ -0,0 +1,150 @@ +/* + * Generate .byte code for some instructions not supported by old + * binutils. + */ +#ifndef X86_ASM_INST_H +#define X86_ASM_INST_H + +#ifdef __ASSEMBLY__ + + .macro XMM_NUM opd xmm + .ifc \xmm,%xmm0 + \opd = 0 + .endif + .ifc \xmm,%xmm1 + \opd = 1 + .endif + .ifc \xmm,%xmm2 + \opd = 2 + .endif + .ifc \xmm,%xmm3 + \opd = 3 + .endif + .ifc \xmm,%xmm4 + \opd = 4 + .endif + .ifc \xmm,%xmm5 + \opd = 5 + .endif + .ifc \xmm,%xmm6 + \opd = 6 + .endif + .ifc \xmm,%xmm7 + \opd = 7 + .endif + .ifc \xmm,%xmm8 + \opd = 8 + .endif + .ifc \xmm,%xmm9 + \opd = 9 + .endif + .ifc \xmm,%xmm10 + \opd = 10 + .endif + .ifc \xmm,%xmm11 + \opd = 11 + .endif + .ifc \xmm,%xmm12 + \opd = 12 + .endif + .ifc \xmm,%xmm13 + \opd = 13 + .endif + .ifc \xmm,%xmm14 + \opd = 14 + .endif + .ifc \xmm,%xmm15 + \opd = 15 + .endif + .endm + + .macro PFX_OPD_SIZE + .byte 0x66 + .endm + + .macro PFX_REX opd1 opd2 + .if (\opd1 | \opd2) & 8 + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm + + .macro PSHUFB_XMM xmm1 xmm2 + XMM_NUM pshufb_opd1 \xmm1 + XMM_NUM pshufb_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX pshufb_opd1 pshufb_opd2 + .byte 0x0f, 0x38, 0x00 + MODRM 0xc0 pshufb_opd1 pshufb_opd2 + .endm + + .macro PCLMULQDQ imm8 xmm1 xmm2 + XMM_NUM clmul_opd1 \xmm1 + XMM_NUM clmul_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX clmul_opd1 clmul_opd2 + .byte 0x0f, 0x3a, 0x44 + MODRM 0xc0 clmul_opd1 clmul_opd2 + .byte \imm8 + .endm + + .macro AESKEYGENASSIST rcon xmm1 xmm2 + XMM_NUM aeskeygen_opd1 \xmm1 + XMM_NUM aeskeygen_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aeskeygen_opd1 aeskeygen_opd2 + .byte 0x0f, 0x3a, 0xdf + MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 + .byte \rcon + .endm + + .macro AESIMC xmm1 xmm2 + XMM_NUM aesimc_opd1 \xmm1 + XMM_NUM aesimc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesimc_opd1 aesimc_opd2 + .byte 0x0f, 0x38, 0xdb + MODRM 0xc0 aesimc_opd1 aesimc_opd2 + .endm + + .macro AESENC xmm1 xmm2 + XMM_NUM aesenc_opd1 \xmm1 + XMM_NUM aesenc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenc_opd1 aesenc_opd2 + .byte 0x0f, 0x38, 0xdc + MODRM 0xc0 aesenc_opd1 aesenc_opd2 + .endm + + .macro AESENCLAST xmm1 xmm2 + XMM_NUM aesenclast_opd1 \xmm1 + XMM_NUM aesenclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenclast_opd1 aesenclast_opd2 + .byte 0x0f, 0x38, 0xdd + MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 + .endm + + .macro AESDEC xmm1 xmm2 + XMM_NUM aesdec_opd1 \xmm1 + XMM_NUM aesdec_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdec_opd1 aesdec_opd2 + .byte 0x0f, 0x38, 0xde + MODRM 0xc0 aesdec_opd1 aesdec_opd2 + .endm + + .macro AESDECLAST xmm1 xmm2 + XMM_NUM aesdeclast_opd1 \xmm1 + XMM_NUM aesdeclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdeclast_opd1 aesdeclast_opd2 + .byte 0x0f, 0x38, 0xdf + MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 + .endm +#endif + +#endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 26b5dd0cb56..81c185a6971 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -440,6 +440,15 @@ config CRYPTO_WP512 See also: +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "GHASH digest algorithm (CLMUL-NI accelerated)" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_SHASH + select CRYPTO_CRYPTD + help + GHASH is message digest algorithm for GCM (Galois/Counter Mode). + The implementation is accelerated by CLMUL-NI of Intel. + comment "Ciphers" config CRYPTO_AES diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 3aa6e3834bf..2bc33214284 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -85,7 +85,7 @@ static void xor_vectors(unsigned char *in1, unsigned char *in2, * Returns DEFAULT_BLK_SZ bytes of random data per call * returns 0 if generation succeded, <0 if something went wrong */ -static int _get_more_prng_bytes(struct prng_context *ctx) +static int _get_more_prng_bytes(struct prng_context *ctx, int cont_test) { int i; unsigned char tmp[DEFAULT_BLK_SZ]; @@ -132,7 +132,7 @@ static int _get_more_prng_bytes(struct prng_context *ctx) */ if (!memcmp(ctx->rand_data, ctx->last_rand_data, DEFAULT_BLK_SZ)) { - if (fips_enabled) { + if (cont_test) { panic("cprng %p Failed repetition check!\n", ctx); } @@ -185,16 +185,14 @@ static int _get_more_prng_bytes(struct prng_context *ctx) } /* Our exported functions */ -static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx) +static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, + int do_cont_test) { unsigned char *ptr = buf; unsigned int byte_count = (unsigned int)nbytes; int err; - if (nbytes < 0) - return -EINVAL; - spin_lock_bh(&ctx->prng_lock); err = -EINVAL; @@ -220,7 +218,7 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx) remainder: if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { - if (_get_more_prng_bytes(ctx) < 0) { + if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; @@ -247,7 +245,7 @@ empty_rbuf: */ for (; byte_count >= DEFAULT_BLK_SZ; byte_count -= DEFAULT_BLK_SZ) { if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { - if (_get_more_prng_bytes(ctx) < 0) { + if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; @@ -356,7 +354,7 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata, { struct prng_context *prng = crypto_rng_ctx(tfm); - return get_prng_bytes(rdata, dlen, prng); + return get_prng_bytes(rdata, dlen, prng, 0); } /* @@ -404,19 +402,79 @@ static struct crypto_alg rng_alg = { } }; +#ifdef CONFIG_CRYPTO_FIPS +static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata, + unsigned int dlen) +{ + struct prng_context *prng = crypto_rng_ctx(tfm); + + return get_prng_bytes(rdata, dlen, prng, 1); +} + +static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +{ + u8 rdata[DEFAULT_BLK_SZ]; + int rc; + + struct prng_context *prng = crypto_rng_ctx(tfm); + + rc = cprng_reset(tfm, seed, slen); + + if (!rc) + goto out; + + /* this primes our continuity test */ + rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); + prng->rand_data_valid = DEFAULT_BLK_SZ; + +out: + return rc; +} + +static struct crypto_alg fips_rng_alg = { + .cra_name = "fips(ansi_cprng)", + .cra_driver_name = "fips_ansi_cprng", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_RNG, + .cra_ctxsize = sizeof(struct prng_context), + .cra_type = &crypto_rng_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(rng_alg.cra_list), + .cra_init = cprng_init, + .cra_exit = cprng_exit, + .cra_u = { + .rng = { + .rng_make_random = fips_cprng_get_random, + .rng_reset = fips_cprng_reset, + .seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ, + } + } +}; +#endif /* Module initalization */ static int __init prng_mod_init(void) { - if (fips_enabled) - rng_alg.cra_priority += 200; + int rc = 0; - return crypto_register_alg(&rng_alg); + rc = crypto_register_alg(&rng_alg); +#ifdef CONFIG_CRYPTO_FIPS + if (rc) + goto out; + + rc = crypto_register_alg(&fips_rng_alg); + +out: +#endif + return rc; } static void __exit prng_mod_fini(void) { crypto_unregister_alg(&rng_alg); +#ifdef CONFIG_CRYPTO_FIPS + crypto_unregister_alg(&fips_rng_alg); +#endif return; } diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 35335825a4e..f8ae0d94a64 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -711,6 +711,13 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(cryptd_ahash_child); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req) +{ + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(cryptd_shash_desc); + void cryptd_free_ahash(struct cryptd_ahash *tfm) { crypto_free_ahash(&tfm->base); diff --git a/crypto/digest.c b/crypto/digest.c deleted file mode 100644 index 5d3f1303da9..00000000000 --- a/crypto/digest.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Cryptographic API. - * - * Digest operations. - * - * Copyright (c) 2002 James Morris - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static int init(struct hash_desc *desc) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - - tfm->__crt_alg->cra_digest.dia_init(tfm); - return 0; -} - -static int update2(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); - - if (!nbytes) - return 0; - - for (;;) { - struct page *pg = sg_page(sg); - unsigned int offset = sg->offset; - unsigned int l = sg->length; - - if (unlikely(l > nbytes)) - l = nbytes; - nbytes -= l; - - do { - unsigned int bytes_from_page = min(l, ((unsigned int) - (PAGE_SIZE)) - - offset); - char *src = crypto_kmap(pg, 0); - char *p = src + offset; - - if (unlikely(offset & alignmask)) { - unsigned int bytes = - alignmask + 1 - (offset & alignmask); - bytes = min(bytes, bytes_from_page); - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes); - p += bytes; - bytes_from_page -= bytes; - l -= bytes; - } - tfm->__crt_alg->cra_digest.dia_update(tfm, p, - bytes_from_page); - crypto_kunmap(src, 0); - crypto_yield(desc->flags); - offset = 0; - pg++; - l -= bytes_from_page; - } while (l > 0); - - if (!nbytes) - break; - sg = scatterwalk_sg_next(sg); - } - - return 0; -} - -static int update(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - return update2(desc, sg, nbytes); -} - -static int final(struct hash_desc *desc, u8 *out) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); - unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); - struct digest_alg *digest = &tfm->__crt_alg->cra_digest; - - if (unlikely((unsigned long)out & alignmask)) { - unsigned long align = alignmask + 1; - unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); - u8 *dst = (u8 *)ALIGN(addr, align) + - ALIGN(tfm->__crt_alg->cra_ctxsize, align); - - digest->dia_final(tfm, dst); - memcpy(out, dst, digest->dia_digestsize); - } else - digest->dia_final(tfm, out); - - return 0; -} - -static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen) -{ - crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(hash); - - crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK); - return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); -} - -static int digest(struct hash_desc *desc, - struct scatterlist *sg, unsigned int nbytes, u8 *out) -{ - if (WARN_ON_ONCE(in_irq())) - return -EDEADLK; - - init(desc); - update2(desc, sg, nbytes); - return final(desc, out); -} - -int crypto_init_digest_ops(struct crypto_tfm *tfm) -{ - struct hash_tfm *ops = &tfm->crt_hash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - ops->init = init; - ops->update = update; - ops->final = final; - ops->digest = digest; - ops->setkey = dalg->dia_setkey ? setkey : nosetkey; - ops->digestsize = dalg->dia_digestsize; - - return 0; -} - -void crypto_exit_digest_ops(struct crypto_tfm *tfm) -{ -} - -static int digest_async_nosetkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return -ENOSYS; -} - -static int digest_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK); - return dalg->dia_setkey(tfm, key, keylen); -} - -static int digest_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - dalg->dia_init(tfm); - return 0; -} - -static int digest_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - update(&desc, req->src, req->nbytes); - return 0; -} - -static int digest_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - final(&desc, req->result); - return 0; -} - -static int digest_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return digest(&desc, req->src, req->nbytes, req->result); -} - -int crypto_init_digest_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; - - if (dalg->dia_digestsize > PAGE_SIZE / 8) - return -EINVAL; - - crt->init = digest_async_init; - crt->update = digest_async_update; - crt->final = digest_async_final; - crt->digest = digest_async_digest; - crt->setkey = dalg->dia_setkey ? digest_async_setkey : - digest_async_nosetkey; - crt->digestsize = dalg->dia_digestsize; - - return 0; -} diff --git a/crypto/hash.c b/crypto/hash.c deleted file mode 100644 index cb86b19fd10..00000000000 --- a/crypto/hash.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Cryptographic Hash operations. - * - * Copyright (c) 2006 Herbert Xu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include -#include -#include -#include -#include -#include - -#include "internal.h" - -static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) -{ - return alg->cra_ctxsize; -} - -static int hash_setkey_unaligned(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - int ret; - u8 *buffer, *alignbuffer; - unsigned long absize; - - absize = keylen + alignmask; - buffer = kmalloc(absize, GFP_ATOMIC); - if (!buffer) - return -ENOMEM; - - alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - memcpy(alignbuffer, key, keylen); - ret = alg->setkey(crt, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); - return ret; -} - -static int hash_setkey(struct crypto_hash *crt, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_hash_tfm(crt); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - unsigned long alignmask = crypto_hash_alignmask(crt); - - if ((unsigned long)key & alignmask) - return hash_setkey_unaligned(crt, key, keylen); - - return alg->setkey(crt, key, keylen); -} - -static int hash_async_setkey(struct crypto_ahash *tfm_async, const u8 *key, - unsigned int keylen) -{ - struct crypto_tfm *tfm = crypto_ahash_tfm(tfm_async); - struct crypto_hash *tfm_hash = __crypto_hash_cast(tfm); - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - return alg->setkey(tfm_hash, key, keylen); -} - -static int hash_async_init(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->init(&desc); -} - -static int hash_async_update(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->update(&desc, req->src, req->nbytes); -} - -static int hash_async_final(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->final(&desc, req->result); -} - -static int hash_async_digest(struct ahash_request *req) -{ - struct crypto_tfm *tfm = req->base.tfm; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - struct hash_desc desc = { - .tfm = __crypto_hash_cast(tfm), - .flags = req->base.flags, - }; - - return alg->digest(&desc, req->src, req->nbytes, req->result); -} - -static int crypto_init_hash_ops_async(struct crypto_tfm *tfm) -{ - struct ahash_tfm *crt = &tfm->crt_ahash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = hash_async_init; - crt->update = hash_async_update; - crt->final = hash_async_final; - crt->digest = hash_async_digest; - crt->setkey = hash_async_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops_sync(struct crypto_tfm *tfm) -{ - struct hash_tfm *crt = &tfm->crt_hash; - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - crt->init = alg->init; - crt->update = alg->update; - crt->final = alg->final; - crt->digest = alg->digest; - crt->setkey = hash_setkey; - crt->digestsize = alg->digestsize; - - return 0; -} - -static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) -{ - struct hash_alg *alg = &tfm->__crt_alg->cra_hash; - - if (alg->digestsize > PAGE_SIZE / 8) - return -EINVAL; - - if ((mask & CRYPTO_ALG_TYPE_HASH_MASK) != CRYPTO_ALG_TYPE_HASH_MASK) - return crypto_init_hash_ops_async(tfm); - else - return crypto_init_hash_ops_sync(tfm); -} - -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); -static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) -{ - seq_printf(m, "type : hash\n"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", alg->cra_hash.digestsize); -} - -const struct crypto_type crypto_hash_type = { - .ctxsize = crypto_hash_ctxsize, - .init = crypto_init_hash_ops, -#ifdef CONFIG_PROC_FS - .show = crypto_hash_show, -#endif -}; -EXPORT_SYMBOL_GPL(crypto_hash_type); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Generic cryptographic hash type"); diff --git a/crypto/proc.c b/crypto/proc.c index 1c38733c224..58fef67d4f4 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -109,13 +109,6 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "max keysize : %u\n", alg->cra_cipher.cia_max_keysize); break; - - case CRYPTO_ALG_TYPE_DIGEST: - seq_printf(m, "type : digest\n"); - seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", - alg->cra_digest.dia_digestsize); - break; case CRYPTO_ALG_TYPE_COMPRESS: seq_printf(m, "type : compression\n"); break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 6d5b746637b..7620bfce92f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1201,7 +1201,7 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, unsigned int tcount) { const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm)); - int err, i, j, seedsize; + int err = 0, i, j, seedsize; u8 *seed; char result[32]; @@ -1942,6 +1942,15 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "ghash", + .test = alg_test_hash, + .suite = { + .hash = { + .vecs = ghash_tv_template, + .count = GHASH_TEST_VECTORS + } + } }, { .alg = "hmac(md5)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9963b18983a..fb765173d41 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1003,6 +1003,21 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; +#define GHASH_TEST_VECTORS 1 + +static struct hash_testvec ghash_tv_template[] = +{ + { + + .key = "\xdf\xa6\xbf\x4d\xed\x81\xdb\x03\xff\xca\xff\x95\xf8\x30\xf0\x61", + .ksize = 16, + .plaintext = "\x95\x2b\x2a\x56\xa5\x60\x04a\xc0\xb3\x2b\x66\x56\xa0\x5b\x40\xb6", + .psize = 16, + .digest = "\xda\x53\xeb\x0a\xd2\xc5\x5b\xb6" + "\x4f\xc4\x80\x2c\xc3\xfe\xda\x60", + }, +}; + /* * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 1573aebd54b..8b7d56a0fe3 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -52,7 +52,9 @@ static struct hwrng *current_rng; static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); - +static int data_avail; +static u8 rng_buffer[SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES] + __cacheline_aligned; static inline int hwrng_init(struct hwrng *rng) { @@ -67,19 +69,6 @@ static inline void hwrng_cleanup(struct hwrng *rng) rng->cleanup(rng); } -static inline int hwrng_data_present(struct hwrng *rng, int wait) -{ - if (!rng->data_present) - return 1; - return rng->data_present(rng, wait); -} - -static inline int hwrng_data_read(struct hwrng *rng, u32 *data) -{ - return rng->data_read(rng, data); -} - - static int rng_dev_open(struct inode *inode, struct file *filp) { /* enforce read-only access to this chrdev */ @@ -91,54 +80,87 @@ static int rng_dev_open(struct inode *inode, struct file *filp) return 0; } +static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, + int wait) { + int present; + + if (rng->read) + return rng->read(rng, (void *)buffer, size, wait); + + if (rng->data_present) + present = rng->data_present(rng, wait); + else + present = 1; + + if (present) + return rng->data_read(rng, (u32 *)buffer); + + return 0; +} + static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { - u32 data; ssize_t ret = 0; int err = 0; - int bytes_read; + int bytes_read, len; while (size) { - err = -ERESTARTSYS; - if (mutex_lock_interruptible(&rng_mutex)) + if (mutex_lock_interruptible(&rng_mutex)) { + err = -ERESTARTSYS; goto out; + } + if (!current_rng) { - mutex_unlock(&rng_mutex); err = -ENODEV; - goto out; + goto out_unlock; + } + + if (!data_avail) { + bytes_read = rng_get_data(current_rng, rng_buffer, + sizeof(rng_buffer), + !(filp->f_flags & O_NONBLOCK)); + if (bytes_read < 0) { + err = bytes_read; + goto out_unlock; + } + data_avail = bytes_read; + } + + if (!data_avail) { + if (filp->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out_unlock; + } + } else { + len = data_avail; + if (len > size) + len = size; + + data_avail -= len; + + if (copy_to_user(buf + ret, rng_buffer + data_avail, + len)) { + err = -EFAULT; + goto out_unlock; + } + + size -= len; + ret += len; } - bytes_read = 0; - if (hwrng_data_present(current_rng, - !(filp->f_flags & O_NONBLOCK))) - bytes_read = hwrng_data_read(current_rng, &data); mutex_unlock(&rng_mutex); - err = -EAGAIN; - if (!bytes_read && (filp->f_flags & O_NONBLOCK)) - goto out; - if (bytes_read < 0) { - err = bytes_read; - goto out; - } - - err = -EFAULT; - while (bytes_read && size) { - if (put_user((u8)data, buf++)) - goto out; - size--; - ret++; - bytes_read--; - data >>= 8; - } - if (need_resched()) schedule_timeout_interruptible(1); - err = -ERESTARTSYS; - if (signal_pending(current)) + + if (signal_pending(current)) { + err = -ERESTARTSYS; goto out; + } } +out_unlock: + mutex_unlock(&rng_mutex); out: return ret ? : err; } @@ -280,7 +302,7 @@ int hwrng_register(struct hwrng *rng) struct hwrng *old_rng, *tmp; if (rng->name == NULL || - rng->data_read == NULL) + (rng->data_read == NULL && rng->read == NULL)) goto out; mutex_lock(&rng_mutex); diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 915157fcff9..bdaef8e9402 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -16,6 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include #include #include @@ -23,78 +24,64 @@ #include #include -/* The host will fill any buffer we give it with sweet, sweet randomness. We - * give it 64 bytes at a time, and the hwrng framework takes it 4 bytes at a - * time. */ -#define RANDOM_DATA_SIZE 64 - static struct virtqueue *vq; -static u32 *random_data; -static unsigned int data_left; +static unsigned int data_avail; static DECLARE_COMPLETION(have_data); +static bool busy; static void random_recv_done(struct virtqueue *vq) { - unsigned int len; - /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ - if (!vq->vq_ops->get_buf(vq, &len)) + if (!vq->vq_ops->get_buf(vq, &data_avail)) return; - data_left += len; complete(&have_data); } -static void register_buffer(void) +/* The host will fill any buffer we give it with sweet, sweet randomness. */ +static void register_buffer(u8 *buf, size_t size) { struct scatterlist sg; - sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left); + sg_init_one(&sg, buf, size); + /* There should always be room for one buffer. */ - if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0) + if (vq->vq_ops->add_buf(vq, &sg, 0, 1, buf) < 0) BUG(); + vq->vq_ops->kick(vq); } -/* At least we don't udelay() in a loop like some other drivers. */ -static int virtio_data_present(struct hwrng *rng, int wait) +static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { - if (data_left >= sizeof(u32)) - return 1; -again: + if (!busy) { + busy = true; + init_completion(&have_data); + register_buffer(buf, size); + } + if (!wait) return 0; wait_for_completion(&have_data); - /* Not enough? Re-register. */ - if (unlikely(data_left < sizeof(u32))) { - register_buffer(); - goto again; - } + busy = false; - return 1; + return data_avail; } -/* virtio_data_present() must have succeeded before this is called. */ -static int virtio_data_read(struct hwrng *rng, u32 *data) +static void virtio_cleanup(struct hwrng *rng) { - BUG_ON(data_left < sizeof(u32)); - data_left -= sizeof(u32); - *data = random_data[data_left / 4]; - - if (data_left < sizeof(u32)) { - init_completion(&have_data); - register_buffer(); - } - return sizeof(*data); + if (busy) + wait_for_completion(&have_data); } + static struct hwrng virtio_hwrng = { - .name = "virtio", - .data_present = virtio_data_present, - .data_read = virtio_data_read, + .name = "virtio", + .cleanup = virtio_cleanup, + .read = virtio_read, }; static int virtrng_probe(struct virtio_device *vdev) @@ -112,7 +99,6 @@ static int virtrng_probe(struct virtio_device *vdev) return err; } - register_buffer(); return 0; } @@ -138,21 +124,11 @@ static struct virtio_driver virtio_rng = { static int __init init(void) { - int err; - - random_data = kmalloc(RANDOM_DATA_SIZE, GFP_KERNEL); - if (!random_data) - return -ENOMEM; - - err = register_virtio_driver(&virtio_rng); - if (err) - kfree(random_data); - return err; + return register_virtio_driver(&virtio_rng); } static void __exit fini(void) { - kfree(random_data); unregister_virtio_driver(&virtio_rng); } module_init(init); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 1ffb53f74d3..fc0d575c71e 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -106,7 +106,6 @@ struct blkcipher_walk { extern const struct crypto_type crypto_ablkcipher_type; extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_blkcipher_type; -extern const struct crypto_type crypto_hash_type; void crypto_mod_put(struct crypto_alg *alg); diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 2f65a6e8ea4..1c96b255017 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -39,6 +39,7 @@ static inline struct cryptd_ahash *__cryptd_ahash_cast( struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req); void cryptd_free_ahash(struct cryptd_ahash *tfm); #endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fd929889e8d..24d2e30f1b4 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -250,29 +250,6 @@ struct cipher_alg { void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct digest_alg { - unsigned int dia_digestsize; - void (*dia_init)(struct crypto_tfm *tfm); - void (*dia_update)(struct crypto_tfm *tfm, const u8 *data, - unsigned int len); - void (*dia_final)(struct crypto_tfm *tfm, u8 *out); - int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -}; - -struct hash_alg { - int (*init)(struct hash_desc *desc); - int (*update)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes); - int (*final)(struct hash_desc *desc, u8 *out); - int (*digest)(struct hash_desc *desc, struct scatterlist *sg, - unsigned int nbytes, u8 *out); - int (*setkey)(struct crypto_hash *tfm, const u8 *key, - unsigned int keylen); - - unsigned int digestsize; -}; - struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); @@ -293,8 +270,6 @@ struct rng_alg { #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher -#define cra_digest cra_u.digest -#define cra_hash cra_u.hash #define cra_compress cra_u.compress #define cra_rng cra_u.rng @@ -320,8 +295,6 @@ struct crypto_alg { struct aead_alg aead; struct blkcipher_alg blkcipher; struct cipher_alg cipher; - struct digest_alg digest; - struct hash_alg hash; struct compress_alg compress; struct rng_alg rng; } cra_u; diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 7244456e7e6..9bede7633f7 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -22,10 +22,12 @@ * @cleanup: Cleanup callback (can be NULL). * @data_present: Callback to determine if data is available * on the RNG. If NULL, it is assumed that - * there is always data available. + * there is always data available. *OBSOLETE* * @data_read: Read data from the RNG device. * Returns the number of lower random bytes in "data". - * Must not be NULL. + * Must not be NULL. *OSOLETE* + * @read: New API. drivers can fill up to max bytes of data + * into the buffer. The buffer is aligned for any type. * @priv: Private data, for use by the RNG driver. */ struct hwrng { @@ -34,6 +36,7 @@ struct hwrng { void (*cleanup)(struct hwrng *rng); int (*data_present)(struct hwrng *rng, int wait); int (*data_read)(struct hwrng *rng, u32 *data); + int (*read)(struct hwrng *rng, void *data, size_t max, bool wait); unsigned long priv; /* internal. */