From d865486282660af20194463e8e36a6d84ab579c8 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sat, 9 Apr 2016 20:44:28 +0200 Subject: [PATCH] base64: Speed up decoder by using a table. The current implementation calls strchr() for each character in the decoded data to find the offset in the Base64[] array that corresponds to six decoded bits. This makes the algorithm scale poorly. This commit introduces a lookup table of size 256 which simplifies the code and improves the performance of the decoder. --- base64.c | 111 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/base64.c b/base64.c index cee51571..7b8fe292 100644 --- a/base64.c +++ b/base64.c @@ -15,6 +15,40 @@ static const char Base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const unsigned char base64_tab[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, /* 00-07 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 08-0f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 10-17 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 18-1f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 20-2f */ + 255, 255, 255, 62, 255, 255, 255, 63, /* 28-2f */ + 52 , 53, 54, 55, 56, 57, 58, 59, /* 30-37 */ + 60 , 61, 255, 255, 255, 255, 255, 255, /* 38-3f */ + 255, 0, 1, 2, 3, 4, 5, 6, /* 40-47 */ + 7 , 8, 9, 10, 11, 12, 13, 14, /* 48-4f */ + 15 , 16, 17, 18, 19, 20, 21, 22, /* 50-57 */ + 23 , 24, 25, 255, 255, 255, 255, 255, /* 58-5f */ + 255, 26, 27, 28, 29, 30, 31, 32, /* 60-6f */ + 33 , 34, 35, 36, 37, 38, 39, 40, /* 68-6f */ + 41 , 42, 43, 44, 45, 46, 47, 48, /* 70-77 */ + 49 , 50, 51, 255, 255, 255, 255, 255, /* 78-7f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 80-87 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 88-8f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 90-97 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 98-9f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* a0-a7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* a8-af */ + 255, 255, 255, 255, 255, 255, 255, 255, /* b0-b7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* b8-bf */ + 255, 255, 255, 255, 255, 255, 255, 255, /* c0-c7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* c8-cf */ + 255, 255, 255, 255, 255, 255, 255, 255, /* d0-d7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* d8-df */ + 255, 255, 255, 255, 255, 255, 255, 255, /* e0-e7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* e8-ef */ + 255, 255, 255, 255, 255, 255, 255, 255, /* f0-f7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* f8-ff */ +}; /** Maximal possible size of the decoded data. */ #define BASE64_MAX_DECODED_SIZE(_encoded_size) ((_encoded_size) / 4 * 3) @@ -41,61 +75,54 @@ static const char Base64[] = int base64_decode(char const *src, size_t encoded_size, char **result, size_t *decoded_size) { - unsigned int tarindex, state; - int ch; - char *pos; - const char *end = src + encoded_size; - unsigned char *target; + size_t i, j, state; /* source/target indices */ + const char *end = src + encoded_size, *p; + unsigned char *target, uch; if (encoded_size == (size_t)-1) encoded_size = strlen(src); target = para_malloc(BASE64_MAX_DECODED_SIZE(encoded_size) + 1); - state = 0; - tarindex = 0; - - while (src < end) { - ch = *src++; - if (para_isspace(ch)) /* Skip whitespace anywhere. */ + for ( + i = 0, j = 0, state = 0; + i < encoded_size && (uch = src[i]) != '\0'; + i++ + ) { + if (para_isspace(uch)) /* Skip whitespace anywhere. */ continue; - - if (ch == PAD64) + if (uch == PAD64) break; - - pos = strchr(Base64, ch); - if (pos == NULL) /* A non-base64 character. */ + if (base64_tab[uch] == 255) /* A non-base64 character. */ goto fail; - + uch = base64_tab[uch]; switch (state) { case 0: - target[tarindex] = (pos - Base64) << 2; - state = 1; + target[j] = uch << 2; break; case 1: - target[tarindex] |= (pos - Base64) >> 4; - target[tarindex + 1] = ((pos - Base64) & 0x0f) << 4; - tarindex++; - state = 2; + target[j] |= uch >> 4; + j++; + target[j] = (uch & 0x0f) << 4; break; case 2: - target[tarindex] |= (pos - Base64) >> 2; - target[tarindex + 1] = ((pos - Base64) & 0x03) << 6; - tarindex++; - state = 3; + target[j] |= uch >> 2; + j++; + target[j] = (uch & 0x03) << 6; break; case 3: - target[tarindex] |= pos - Base64; - tarindex++; - state = 0; + target[j] |= uch; + j++; break; } + state = (state + 1) % 4; } + p = (i < encoded_size)? src + i : NULL; /* * We are done decoding Base-64 chars. Let's see if we ended * on a byte boundary, and/or with erroneous trailing characters. */ - if (*src == PAD64) { /* We got a pad char. */ - ch = *src++; /* Skip it, get next. */ + if (p && *p == PAD64) { /* We got a pad char. Skip it, get next. */ + p++; switch (state) { case 0: /* Invalid = in first position */ case 1: /* Invalid = in second position */ @@ -103,22 +130,22 @@ int base64_decode(char const *src, size_t encoded_size, char **result, case 2: /* Valid, means one byte of info */ /* Skip any number of spaces. */ - for (; ch != '\0'; ch = *src++) - if (!para_isspace(ch)) + for (; p < end && *p != '\0'; p++) + if (!para_isspace(*p)) break; /* Make sure there is another trailing = sign. */ - if (ch != PAD64) + if (*p != PAD64) goto fail; - ch = *src++; /* Skip the = */ /* Fall through to "single trailing =" case. */ + p++; case 3: /* Valid, means two bytes of info */ /* * We know this char is an =. Is there anything but * whitespace after it? */ - for (; ch != '\0'; ch = *src++) - if (!para_isspace(ch)) + for (; p < end && *p != '\0'; p++) + if (!para_isspace(*p)) goto fail; /* * Now make sure for cases 2 and 3 that the "extra" @@ -126,7 +153,7 @@ int base64_decode(char const *src, size_t encoded_size, char **result, * zeros. If we don't check them, they become a * subliminal channel. */ - if (target[tarindex] != 0) + if (target[j] != 0) goto fail; } } else { @@ -138,10 +165,10 @@ int base64_decode(char const *src, size_t encoded_size, char **result, goto fail; } /* success */ - target[tarindex] = '\0'; /* just to be sure */ - *result = (char *)target; + target[j] = '\0'; /* just to be sure */ if (decoded_size) - *decoded_size = tarindex; + *decoded_size = j; + *result = (char *)target; return 1; fail: free(target); -- 2.30.2