From: Andre Noll Date: Tue, 20 Dec 2016 14:40:09 +0000 (+0100) Subject: Merge branch 'refs/heads/t/base64' X-Git-Tag: v0.5.7~20 X-Git-Url: http://git.tuebingen.mpg.de/?a=commitdiff_plain;h=78cc9c57ad9b1cc389956957030d37f71cb60a07;hp=904e302f0b64887f18c9e2fec7b0bb405675ad22;p=paraslash.git Merge branch 'refs/heads/t/base64' A couple of patches which move the base64 code to a separate file, and improve on it. Was cooking for several months. * refs/heads/t/base64: base64: Speed up decoder by using a table. base64: Use para_isspace() everywhere. base64: Trivial whitespace fixes. base64: Replace Pad64 variable by macro. base64: Remove unnecessary overflow checks. base64: Saner semantics for base64_decode() and uudecode(). Move base64 implementation to own file. --- diff --git a/NEWS.md b/NEWS.md index afc7f105..b0862d5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ NEWS ------------------------------------------ 0.5.7 (to be announced) "semantic density" ------------------------------------------ - +- Speedup of the base64 decoder. - One of the two source browsers has been removed from the web pages. The doxygen API reference still contains an HTML version of each source file. diff --git a/base64.c b/base64.c new file mode 100644 index 00000000..7b8fe292 --- /dev/null +++ b/base64.c @@ -0,0 +1,205 @@ +/* + * The code in this file was taken from openssh-5.2p1, Copyright (c) 1996 by + * Internet Software Consortium. Portions Copyright (c) 1995 by International + * Business Machines, Inc. + */ + +/** \file base64.c Uudecode and base64decode implementation. */ + +#include + +#include "para.h" +#include "error.h" +#include "base64.h" +#include "string.h" + +static const char Base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const unsigned char base64_tab[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, /* 00-07 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 08-0f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 10-17 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 18-1f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 20-2f */ + 255, 255, 255, 62, 255, 255, 255, 63, /* 28-2f */ + 52 , 53, 54, 55, 56, 57, 58, 59, /* 30-37 */ + 60 , 61, 255, 255, 255, 255, 255, 255, /* 38-3f */ + 255, 0, 1, 2, 3, 4, 5, 6, /* 40-47 */ + 7 , 8, 9, 10, 11, 12, 13, 14, /* 48-4f */ + 15 , 16, 17, 18, 19, 20, 21, 22, /* 50-57 */ + 23 , 24, 25, 255, 255, 255, 255, 255, /* 58-5f */ + 255, 26, 27, 28, 29, 30, 31, 32, /* 60-6f */ + 33 , 34, 35, 36, 37, 38, 39, 40, /* 68-6f */ + 41 , 42, 43, 44, 45, 46, 47, 48, /* 70-77 */ + 49 , 50, 51, 255, 255, 255, 255, 255, /* 78-7f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 80-87 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 88-8f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 90-97 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* 98-9f */ + 255, 255, 255, 255, 255, 255, 255, 255, /* a0-a7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* a8-af */ + 255, 255, 255, 255, 255, 255, 255, 255, /* b0-b7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* b8-bf */ + 255, 255, 255, 255, 255, 255, 255, 255, /* c0-c7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* c8-cf */ + 255, 255, 255, 255, 255, 255, 255, 255, /* d0-d7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* d8-df */ + 255, 255, 255, 255, 255, 255, 255, 255, /* e0-e7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* e8-ef */ + 255, 255, 255, 255, 255, 255, 255, 255, /* f0-f7 */ + 255, 255, 255, 255, 255, 255, 255, 255, /* f8-ff */ +}; + +/** Maximal possible size of the decoded data. */ +#define BASE64_MAX_DECODED_SIZE(_encoded_size) ((_encoded_size) / 4 * 3) + +#define PAD64 '=' +/** + * base64-decode a buffer. + * + * \param src The buffer to decode. + * \param encoded_size The special value -1 means: look for terminating zero byte. + * \param result Points to dynamically allocated target buffer on success. + * \param decoded_size Number of bytes written to \a result. + * + * Skips all whitespace anywhere. Converts characters, four at a time, starting + * at (or after) src from base - 64 numbers into three 8 bit bytes in the + * target area. + * + * It is OK to pass a \p NULL pointer as \a decoded_size. The result is + * terminated with a zero byte. + * + * \return Standard. The contents of result \a and \a decoded_size are + * undefined on failure. + */ +int base64_decode(char const *src, size_t encoded_size, char **result, + size_t *decoded_size) +{ + size_t i, j, state; /* source/target indices */ + const char *end = src + encoded_size, *p; + unsigned char *target, uch; + + if (encoded_size == (size_t)-1) + encoded_size = strlen(src); + target = para_malloc(BASE64_MAX_DECODED_SIZE(encoded_size) + 1); + + for ( + i = 0, j = 0, state = 0; + i < encoded_size && (uch = src[i]) != '\0'; + i++ + ) { + if (para_isspace(uch)) /* Skip whitespace anywhere. */ + continue; + if (uch == PAD64) + break; + if (base64_tab[uch] == 255) /* A non-base64 character. */ + goto fail; + uch = base64_tab[uch]; + switch (state) { + case 0: + target[j] = uch << 2; + break; + case 1: + target[j] |= uch >> 4; + j++; + target[j] = (uch & 0x0f) << 4; + break; + case 2: + target[j] |= uch >> 2; + j++; + target[j] = (uch & 0x03) << 6; + break; + case 3: + target[j] |= uch; + j++; + break; + } + state = (state + 1) % 4; + } + p = (i < encoded_size)? src + i : NULL; + /* + * We are done decoding Base-64 chars. Let's see if we ended + * on a byte boundary, and/or with erroneous trailing characters. + */ + if (p && *p == PAD64) { /* We got a pad char. Skip it, get next. */ + p++; + switch (state) { + case 0: /* Invalid = in first position */ + case 1: /* Invalid = in second position */ + goto fail; + + case 2: /* Valid, means one byte of info */ + /* Skip any number of spaces. */ + for (; p < end && *p != '\0'; p++) + if (!para_isspace(*p)) + break; + /* Make sure there is another trailing = sign. */ + if (*p != PAD64) + goto fail; + /* Fall through to "single trailing =" case. */ + p++; + + case 3: /* Valid, means two bytes of info */ + /* + * We know this char is an =. Is there anything but + * whitespace after it? + */ + for (; p < end && *p != '\0'; p++) + if (!para_isspace(*p)) + goto fail; + /* + * Now make sure for cases 2 and 3 that the "extra" + * bits that slopped past the last full byte were + * zeros. If we don't check them, they become a + * subliminal channel. + */ + if (target[j] != 0) + goto fail; + } + } else { + /* + * We ended by seeing the end of the string. Make sure we + * have no partial bytes lying around. + */ + if (state != 0) + goto fail; + } + /* success */ + target[j] = '\0'; /* just to be sure */ + if (decoded_size) + *decoded_size = j; + *result = (char *)target; + return 1; +fail: + free(target); + return -E_BASE64; +} + +/** + * Decode a buffer using the uuencode Base64 algorithm. + * + * \param src The buffer to decode. + * \param encoded_size Number of input bytes in the source buffer. + * \param result Contains the decoded data on success. + * \param decoded_size Number of output bytes on success. + * + * This is just a simple wrapper for \ref base64_decode() which strips + * whitespace. + * + * \return The return value of the underlying call to \ref base64_decode(). + * + * \sa uuencode(1), uudecode(1). + */ +int uudecode(char const *src, size_t encoded_size, char **result, + size_t *decoded_size) +{ + const char *end = src + encoded_size, *p; + + /* skip whitespace and data */ + for (p = src; p < end && (*p == ' ' || *p == '\t'); p++) + ; + for (; p < end && *p != '\0' && *p != ' ' && *p != '\t'; p++) + ; + /* and remove trailing whitespace because base64_decode needs this */ + return base64_decode(src, p - src, result, decoded_size); +} diff --git a/base64.h b/base64.h new file mode 100644 index 00000000..4bfaa99d --- /dev/null +++ b/base64.h @@ -0,0 +1,4 @@ +int uudecode(char const *src, size_t encoded_size, char **result, + size_t *decoded_size); +int base64_decode(char const *src, size_t encoded_size, char **result, + size_t *decoded_size); diff --git a/configure.ac b/configure.ac index f9115fd6..8d4ce59a 100644 --- a/configure.ac +++ b/configure.ac @@ -418,6 +418,7 @@ if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then close_on_fork mm crypt_common + base64 ipc dccp_send fd @@ -480,6 +481,7 @@ if test -n "$CRYPTOLIB"; then client_common buffer_tree crypt_common + base64 version ggo " @@ -523,6 +525,7 @@ if test -n "$CRYPTOLIB"; then stat net crypt_common + base64 sideband time grab_client diff --git a/crypt.c b/crypt.c index 610d2057..f227eb39 100644 --- a/crypt.c +++ b/crypt.c @@ -23,6 +23,7 @@ #include "crypt.h" #include "fd.h" #include "crypt_backend.h" +#include "base64.h" struct asymmetric_key { RSA *rsa; @@ -158,7 +159,7 @@ int get_asymmetric_key(const char *key_file, int private, struct asymmetric_key *key = NULL; void *map = NULL; unsigned char *blob = NULL; - size_t map_size, blob_size, decoded_size; + size_t map_size, encoded_size, decoded_size; int ret, ret2; char *cp; @@ -180,16 +181,11 @@ int get_asymmetric_key(const char *key_file, int private, goto out; } cp = map + ret; + encoded_size = map_size - ret; PARA_INFO_LOG("decoding public rsa-ssh key %s\n", key_file); - ret = -ERRNO_TO_PARA_ERROR(EOVERFLOW); - if (map_size > INT_MAX / 4) - goto out_unmap; - blob_size = 2 * map_size; - blob = para_malloc(blob_size); - ret = uudecode(cp, blob, blob_size); + ret = uudecode(cp, encoded_size, (char **)&blob, &decoded_size); if (ret < 0) goto out_unmap; - decoded_size = ret; ret = check_ssh_key_header(blob, decoded_size); if (ret < 0) goto out_unmap; diff --git a/crypt_backend.h b/crypt_backend.h index 06c86d74..f9a69d94 100644 --- a/crypt_backend.h +++ b/crypt_backend.h @@ -13,7 +13,5 @@ size_t is_ssh_rsa_key(char *data, size_t size); uint32_t read_ssh_u32(const void *vp); -int uudecode(const char *src, unsigned char *target, size_t targsize); int check_ssh_key_header(const unsigned char *blob, int blen); int check_key_file(const char *file, bool private_key); -int base64_decode(char const *src, unsigned char *target, size_t targsize); diff --git a/crypt_common.c b/crypt_common.c index 022692ad..b39ee5e4 100644 --- a/crypt_common.c +++ b/crypt_common.c @@ -45,166 +45,6 @@ size_t is_ssh_rsa_key(char *data, size_t size) return cp - data; } -/* - * This base64/uudecode stuff below is taken from openssh-5.2p1, Copyright (c) - * 1996 by Internet Software Consortium. Portions Copyright (c) 1995 by - * International Business Machines, Inc. - */ - -static const char Base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -static const char Pad64 = '='; - -/** - * base64-decode a buffer. - * - * \param src The buffer to decode. - * \param target Result is stored here. - * \param targsize Number of bytes of \a target. - * - * Skips all whitespace anywhere. Converts characters, four at a time, starting - * at (or after) src from base - 64 numbers into three 8 bit bytes in the - * target area. - * - * \return The number of data bytes stored at the target, -E_BASE64 on errors. - */ -int base64_decode(char const *src, unsigned char *target, size_t targsize) -{ - unsigned int tarindex, state; - int ch; - char *pos; - - state = 0; - tarindex = 0; - - while ((ch = *src++) != '\0') { - if (para_isspace(ch)) /* Skip whitespace anywhere. */ - continue; - - if (ch == Pad64) - break; - - pos = strchr(Base64, ch); - if (pos == NULL) /* A non-base64 character. */ - return -E_BASE64; - - switch (state) { - case 0: - if (tarindex >= targsize) - return -E_BASE64; - target[tarindex] = (pos - Base64) << 2; - state = 1; - break; - case 1: - if (tarindex + 1 >= targsize) - return -E_BASE64; - target[tarindex] |= (pos - Base64) >> 4; - target[tarindex + 1] = ((pos - Base64) & 0x0f) << 4; - tarindex++; - state = 2; - break; - case 2: - if (tarindex + 1 >= targsize) - return -E_BASE64; - target[tarindex] |= (pos - Base64) >> 2; - target[tarindex + 1] = ((pos - Base64) & 0x03) << 6; - tarindex++; - state = 3; - break; - case 3: - if (tarindex >= targsize) - return -E_BASE64; - target[tarindex] |= pos - Base64; - tarindex++; - state = 0; - break; - } - } - - /* - * We are done decoding Base-64 chars. Let's see if we ended - * on a byte boundary, and/or with erroneous trailing characters. - */ - - if (ch == Pad64) { /* We got a pad char. */ - ch = *src++; /* Skip it, get next. */ - switch (state) { - case 0: /* Invalid = in first position */ - case 1: /* Invalid = in second position */ - return -E_BASE64; - - case 2: /* Valid, means one byte of info */ - /* Skip any number of spaces. */ - for (; ch != '\0'; ch = *src++) - if (!isspace(ch)) - break; - /* Make sure there is another trailing = sign. */ - if (ch != Pad64) - return -E_BASE64; - ch = *src++; /* Skip the = */ - /* Fall through to "single trailing =" case. */ - /* FALLTHROUGH */ - - case 3: /* Valid, means two bytes of info */ - /* - * We know this char is an =. Is there anything but - * whitespace after it? - */ - for (; ch != '\0'; ch = *src++) - if (!isspace(ch)) - return -E_BASE64; - - /* - * Now make sure for cases 2 and 3 that the "extra" - * bits that slopped past the last full byte were - * zeros. If we don't check them, they become a - * subliminal channel. - */ - if (target[tarindex] != 0) - return -E_BASE64; - } - } else { - /* - * We ended by seeing the end of the string. Make sure we - * have no partial bytes lying around. - */ - if (state != 0) - return -E_BASE64; - } - - return tarindex; -} - -/** - * uudecode a buffer. - * - * \param src The buffer to decode. - * \param target Result buffer. - * \param targsize The length of \a target in bytes. - * - * This is just a simple wrapper for base64_decode() which strips whitespace. - * - * \return The return value of the underlying call to base64_decode(). - */ -int uudecode(const char *src, unsigned char *target, size_t targsize) -{ - int len; - char *encoded, *p; - - /* copy the 'readonly' source */ - encoded = para_strdup(src); - /* skip whitespace and data */ - for (p = encoded; *p == ' ' || *p == '\t'; p++) - ; - for (; *p != '\0' && *p != ' ' && *p != '\t'; p++) - ; - /* and remove trailing whitespace because base64_decode needs this */ - *p = '\0'; - len = base64_decode(encoded, target, targsize); - free(encoded); - return len; -} - /** * Read a 4-byte number from a buffer in big-endian format. * diff --git a/error.h b/error.h index 3fda5787..ff85c8d1 100644 --- a/error.h +++ b/error.h @@ -439,9 +439,10 @@ extern const char **para_errlist[]; #define CRYPT_COMMON_ERRORS \ PARA_ERROR(SSH_KEY_HEADER, "ssh key header not found"), \ - PARA_ERROR(BASE64, "failed to base64-decode ssh public key"), \ PARA_ERROR(KEY_PERM, "unprotected private key"), \ +#define BASE64_ERRORS \ + PARA_ERROR(BASE64, "base64 decode error"), \ #define CRYPT_ERRORS \ PARA_ERROR(PRIVATE_KEY, "can not read private key"), \ diff --git a/gcrypt.c b/gcrypt.c index 3c6c1ad1..289748e8 100644 --- a/gcrypt.c +++ b/gcrypt.c @@ -15,6 +15,7 @@ #include "crypt.h" #include "crypt_backend.h" #include "fd.h" +#include "base64.h" //#define GCRYPT_DEBUG 1 @@ -239,12 +240,11 @@ static int decode_key(const char *key_file, const char *header_str, key[j++] = begin[i]; } key[j] = '\0'; - blob_size = key_size * 2; - blob = para_malloc(blob_size); - ret = base64_decode(key, blob, blob_size); + ret = base64_decode(key, j, (char **)&blob, &blob_size); free(key); if (ret < 0) goto free_unmap; + ret = blob_size; goto unmap; free_unmap: free(blob); @@ -606,13 +606,9 @@ static int get_ssh_public_key(unsigned char *data, int size, gcry_sexp_t *result gcry_mpi_t e = NULL, n = NULL; PARA_DEBUG_LOG("decoding %d byte public rsa-ssh key\n", size); - if (size > INT_MAX / 4) - return -ERRNO_TO_PARA_ERROR(EOVERFLOW); - blob = para_malloc(2 * size); - ret = uudecode((char *)data, blob, 2 * size); + ret = uudecode((char *)data, size, (char **)&blob, &decoded_size); if (ret < 0) goto free_blob; - decoded_size = ret; end = blob + decoded_size; dump_buffer("decoded key", blob, decoded_size); ret = check_ssh_key_header(blob, decoded_size);