You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mynewt.apache.org by ut...@apache.org on 2020/03/04 17:34:24 UTC
[mynewt-core] 03/03: crypto: use 32-bit XOR in CBC/CTR
This is an automated email from the ASF dual-hosted git repository.
utzig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mynewt-core.git
commit 6897f5d37b25534ba2282dfc674831082c6534c9
Author: Fabio Utzig <ut...@apache.org>
AuthorDate: Fri Jan 31 10:00:19 2020 -0300
crypto: use 32-bit XOR in CBC/CTR
Using 32-bit XORs is around 10% faster with slightly increased code size
(for optimized target). For CBC always requires AES block length buffers
so just employ 32-bit XORs. For CTR due to the stream semantics, use
32-bit XOR when buffer is AES block length size, otherwise fallback to
byte-by-byte XOR.
Signed-off-by: Fabio Utzig <ut...@apache.org>
---
hw/drivers/crypto/src/crypto.c | 69 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 62 insertions(+), 7 deletions(-)
diff --git a/hw/drivers/crypto/src/crypto.c b/hw/drivers/crypto/src/crypto.c
index 638b032..8409cee 100644
--- a/hw/drivers/crypto/src/crypto.c
+++ b/hw/drivers/crypto/src/crypto.c
@@ -19,6 +19,8 @@
#include "crypto/crypto.h"
+#include <stdint.h>
+
/*
* Implement modes using ECB for non-available HW support
*/
@@ -35,6 +37,12 @@ crypto_do_ctr(struct crypto_dev *crypto, const void *key, uint16_t keylen,
uint8_t *inbuf8 = (uint8_t *)inbuf;
uint8_t _nonce[AES_BLOCK_LEN];
uint8_t _out[AES_BLOCK_LEN];
+#if defined(__ARM_FEATURE_UNALIGNED)
+ /* accelerate operations doing 32-bit XORs */
+ uint32_t *outbuf32;
+ uint32_t *inbuf32;
+ uint32_t *_out32 = (uint32_t *)_out;
+#endif
int rc;
if (crypto->interface.encrypt == NULL) {
@@ -56,9 +64,25 @@ crypto_do_ctr(struct crypto_dev *crypto, const void *key, uint16_t keylen,
return sz + rc;
}
- for (i = 0; i < len; i++) {
- outbuf8[i] = inbuf8[i] ^ _out[i];
+#if defined(__ARM_FEATURE_UNALIGNED)
+ /*
+ * For full blocks increase speed by doing 32-bit XOR; maintain the
+ * stream semantics doing byte XORs for smaller sizes (end of buffer).
+ */
+ if (len == AES_BLOCK_LEN) {
+ inbuf32 = (uint32_t *)inbuf8;
+ outbuf32 = (uint32_t *)outbuf8;
+ for (i = 0; i < len / 4; i++) {
+ outbuf32[i] = inbuf32[i] ^ _out32[i];
+ }
+ } else {
+#endif
+ for (i = 0; i < len; i++) {
+ outbuf8[i] = inbuf8[i] ^ _out[i];
+ }
+#if defined(__ARM_FEATURE_UNALIGNED)
}
+#endif
for (i = AES_BLOCK_LEN; i > 0; --i) {
if (++_nonce[i - 1] != 0) {
@@ -86,12 +110,18 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key,
size_t remain;
uint32_t i;
uint32_t j;
- uint8_t tmp[AES_BLOCK_LEN];
const uint8_t *ivp;
uint8_t iv_save[AES_BLOCK_LEN * 2];
uint8_t ivpos;
uint8_t *outbuf8 = (uint8_t *)outbuf;
const uint8_t *inbuf8 = (const uint8_t *)inbuf;
+#if defined(__ARM_FEATURE_UNALIGNED)
+ uint32_t tmp32[AES_BLOCK_LEN / 4];
+ uint32_t *outbuf32 = (uint32_t *)outbuf;
+ const uint32_t *inbuf32 = (uint32_t *)inbuf;
+#else
+ uint8_t tmp[AES_BLOCK_LEN];
+#endif
bool inplace;
int rc;
@@ -118,13 +148,25 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key,
}
if (op == CRYPTO_OP_ENCRYPT) {
+#if defined(__ARM_FEATURE_UNALIGNED)
+ for (j = 0; j < AES_BLOCK_LEN / 4; j++) {
+ tmp32[j] = ((uint32_t *)ivp)[j] ^ inbuf32[(i / 4) + j];
+ }
+#else
for (j = 0; j < AES_BLOCK_LEN; j++) {
tmp[j] = ivp[j] ^ inbuf8[j+i];
}
+#endif
rc = crypto->interface.encrypt(crypto, CRYPTO_ALGO_AES,
- CRYPTO_MODE_ECB, (const uint8_t *)key, keylen, NULL, tmp,
- &outbuf8[i], AES_BLOCK_LEN);
+ CRYPTO_MODE_ECB,
+ (const uint8_t *)key, keylen, NULL,
+#if defined(__ARM_FEATURE_UNALIGNED)
+ (uint8_t *)tmp32,
+#else
+ tmp,
+#endif
+ &outbuf8[i], AES_BLOCK_LEN);
if (rc != AES_BLOCK_LEN) {
return rc;
}
@@ -132,8 +174,15 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key,
ivp = &outbuf8[i];
} else {
rc = crypto->interface.decrypt(crypto, CRYPTO_ALGO_AES,
- CRYPTO_MODE_ECB, (const uint8_t *)key, keylen, NULL,
- &inbuf8[i], tmp, AES_BLOCK_LEN);
+ CRYPTO_MODE_ECB,
+ (const uint8_t *)key, keylen, NULL,
+ &inbuf8[i],
+#if defined(__ARM_FEATURE_UNALIGNED)
+ (uint8_t *)tmp32,
+#else
+ tmp,
+#endif
+ AES_BLOCK_LEN);
if (rc != AES_BLOCK_LEN) {
return rc;
}
@@ -142,9 +191,15 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key,
memcpy(&iv_save[ivpos], &inbuf8[i], AES_BLOCK_LEN);
}
+#if defined(__ARM_FEATURE_UNALIGNED)
+ for (j = 0; j < AES_BLOCK_LEN / 4; j++) {
+ outbuf32[(i / 4) + j] = ((uint32_t *)ivp)[j] ^ tmp32[j];
+ }
+#else
for (j = 0; j < AES_BLOCK_LEN; j++) {
outbuf8[i+j] = ivp[j] ^ tmp[j];
}
+#endif
if (inplace) {
ivp = &iv_save[ivpos];