]> git.tuebingen.mpg.de Git - paraslash.git/commitdiff
Merge branch 'refs/heads/t/base64'
authorAndre Noll <maan@tuebingen.mpg.de>
Tue, 20 Dec 2016 14:40:09 +0000 (15:40 +0100)
committerAndre Noll <maan@tuebingen.mpg.de>
Tue, 20 Dec 2016 14:42:23 +0000 (15:42 +0100)
A couple of patches which move the base64 code to a separate file,
and improve on it. Was cooking for several months.

* refs/heads/t/base64:
  base64: Speed up decoder by using a table.
  base64: Use para_isspace() everywhere.
  base64: Trivial whitespace fixes.
  base64: Replace Pad64 variable by macro.
  base64: Remove unnecessary overflow checks.
  base64: Saner semantics for base64_decode() and uudecode().
  Move base64 implementation to own file.

NEWS.md
base64.c [new file with mode: 0644]
base64.h [new file with mode: 0644]
configure.ac
crypt.c
crypt_backend.h
crypt_common.c
error.h
gcrypt.c

diff --git a/NEWS.md b/NEWS.md
index afc7f1054d9df40371a6594255c6e47b4baf356b..b0862d5f392ee33504255eab1d0d43e3ee157067 100644 (file)
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@ NEWS
 ------------------------------------------
 0.5.7 (to be announced) "semantic density"
 ------------------------------------------
-
+- Speedup of the base64 decoder.
 - One of the two source browsers has been removed from the web pages.
   The doxygen API reference still contains an HTML version of each
   source file.
diff --git a/base64.c b/base64.c
new file mode 100644 (file)
index 0000000..7b8fe29
--- /dev/null
+++ b/base64.c
@@ -0,0 +1,205 @@
+/*
+ * The code in this file was taken from openssh-5.2p1, Copyright (c) 1996 by
+ * Internet Software Consortium.  Portions Copyright (c) 1995 by International
+ * Business Machines, Inc.
+ */
+
+/** \file base64.c Uudecode and base64decode implementation. */
+
+#include <regex.h>
+
+#include "para.h"
+#include "error.h"
+#include "base64.h"
+#include "string.h"
+
+static const char Base64[] =
+       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const unsigned char base64_tab[256] = {
+       255, 255, 255, 255, 255, 255, 255, 255, /* 00-07 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 08-0f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 10-17 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 18-1f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 20-2f */
+       255, 255, 255,  62, 255, 255, 255,  63, /* 28-2f */
+       52 ,  53,  54,  55,  56,  57,  58,  59, /* 30-37 */
+       60 ,  61, 255, 255, 255, 255, 255, 255, /* 38-3f */
+       255,   0,   1,   2,   3,   4,   5,   6, /* 40-47 */
+       7  ,   8,   9,  10,  11,  12,  13,  14, /* 48-4f */
+       15 ,  16,  17,  18,  19,  20,  21,  22, /* 50-57 */
+       23 ,  24,  25, 255, 255, 255, 255, 255, /* 58-5f */
+       255,  26,  27,  28,  29,  30,  31,  32, /* 60-6f */
+       33 ,  34,  35,  36,  37,  38,  39,  40, /* 68-6f */
+       41 ,  42,  43,  44,  45,  46,  47,  48, /* 70-77 */
+       49 ,  50,  51, 255, 255, 255, 255, 255, /* 78-7f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 80-87 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 88-8f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 90-97 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* 98-9f */
+       255, 255, 255, 255, 255, 255, 255, 255, /* a0-a7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* a8-af */
+       255, 255, 255, 255, 255, 255, 255, 255, /* b0-b7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* b8-bf */
+       255, 255, 255, 255, 255, 255, 255, 255, /* c0-c7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* c8-cf */
+       255, 255, 255, 255, 255, 255, 255, 255, /* d0-d7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* d8-df */
+       255, 255, 255, 255, 255, 255, 255, 255, /* e0-e7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* e8-ef */
+       255, 255, 255, 255, 255, 255, 255, 255, /* f0-f7 */
+       255, 255, 255, 255, 255, 255, 255, 255, /* f8-ff */
+};
+
+/** Maximal possible size of the decoded data. */
+#define BASE64_MAX_DECODED_SIZE(_encoded_size) ((_encoded_size) / 4 * 3)
+
+#define PAD64 '='
+/**
+ * base64-decode a buffer.
+ *
+ * \param src The buffer to decode.
+ * \param encoded_size The special value -1 means: look for terminating zero byte.
+ * \param result Points to dynamically allocated target buffer on success.
+ * \param decoded_size Number of bytes written to \a result.
+ *
+ * Skips all whitespace anywhere. Converts characters, four at a time, starting
+ * at (or after) src from base - 64 numbers into three 8 bit bytes in the
+ * target area.
+ *
+ * It is OK to pass a \p NULL pointer as \a decoded_size. The result is
+ * terminated with a zero byte.
+ *
+ * \return Standard. The contents of result \a and \a decoded_size are
+ * undefined on failure.
+ */
+int base64_decode(char const *src, size_t encoded_size, char **result,
+               size_t *decoded_size)
+{
+       size_t i, j, state; /* source/target indices */
+       const char *end = src + encoded_size, *p;
+       unsigned char *target, uch;
+
+       if (encoded_size == (size_t)-1)
+               encoded_size = strlen(src);
+       target = para_malloc(BASE64_MAX_DECODED_SIZE(encoded_size) + 1);
+
+       for (
+               i = 0, j = 0, state = 0;
+               i < encoded_size && (uch = src[i]) != '\0';
+               i++
+       ) {
+               if (para_isspace(uch)) /* Skip whitespace anywhere. */
+                       continue;
+               if (uch == PAD64)
+                       break;
+               if (base64_tab[uch] == 255) /* A non-base64 character. */
+                       goto fail;
+               uch = base64_tab[uch];
+               switch (state) {
+               case 0:
+                       target[j] = uch << 2;
+                       break;
+               case 1:
+                       target[j] |= uch >> 4;
+                       j++;
+                       target[j] = (uch & 0x0f) << 4;
+                       break;
+               case 2:
+                       target[j] |= uch >> 2;
+                       j++;
+                       target[j] = (uch & 0x03) << 6;
+                       break;
+               case 3:
+                       target[j] |= uch;
+                       j++;
+                       break;
+               }
+               state = (state + 1) % 4;
+       }
+       p = (i < encoded_size)? src + i : NULL;
+       /*
+        * We are done decoding Base-64 chars.  Let's see if we ended
+        * on a byte boundary, and/or with erroneous trailing characters.
+        */
+       if (p && *p == PAD64) { /* We got a pad char. Skip it, get next. */
+               p++;
+               switch (state) {
+               case 0: /* Invalid = in first position */
+               case 1: /* Invalid = in second position */
+                       goto fail;
+
+               case 2: /* Valid, means one byte of info */
+                       /* Skip any number of spaces. */
+                       for (; p < end && *p != '\0'; p++)
+                               if (!para_isspace(*p))
+                                       break;
+                       /* Make sure there is another trailing = sign. */
+                       if (*p != PAD64)
+                               goto fail;
+                       /* Fall through to "single trailing =" case. */
+                       p++;
+
+               case 3: /* Valid, means two bytes of info */
+                       /*
+                        * We know this char is an =.  Is there anything but
+                        * whitespace after it?
+                        */
+                       for (; p < end && *p != '\0'; p++)
+                               if (!para_isspace(*p))
+                                       goto fail;
+                       /*
+                        * Now make sure for cases 2 and 3 that the "extra"
+                        * bits that slopped past the last full byte were
+                        * zeros.  If we don't check them, they become a
+                        * subliminal channel.
+                        */
+                       if (target[j] != 0)
+                               goto fail;
+               }
+       } else {
+               /*
+                * We ended by seeing the end of the string.  Make sure we
+                * have no partial bytes lying around.
+                */
+               if (state != 0)
+                       goto fail;
+       }
+       /* success */
+       target[j] = '\0'; /* just to be sure */
+       if (decoded_size)
+               *decoded_size = j;
+       *result = (char *)target;
+       return 1;
+fail:
+       free(target);
+       return -E_BASE64;
+}
+
+/**
+ * Decode a buffer using the uuencode Base64 algorithm.
+ *
+ * \param src The buffer to decode.
+ * \param encoded_size Number of input bytes in the source buffer.
+ * \param result Contains the decoded data on success.
+ * \param decoded_size Number of output bytes on success.
+ *
+ * This is just a simple wrapper for \ref base64_decode() which strips
+ * whitespace.
+ *
+ * \return The return value of the underlying call to \ref base64_decode().
+ *
+ * \sa uuencode(1), uudecode(1).
+ */
+int uudecode(char const *src, size_t encoded_size, char **result,
+               size_t *decoded_size)
+{
+       const char *end = src + encoded_size, *p;
+
+       /* skip whitespace and data */
+       for (p = src; p < end && (*p == ' ' || *p == '\t'); p++)
+               ;
+       for (; p < end && *p != '\0' && *p != ' ' && *p != '\t'; p++)
+               ;
+       /* and remove trailing whitespace because base64_decode needs this */
+       return base64_decode(src, p - src, result, decoded_size);
+}
diff --git a/base64.h b/base64.h
new file mode 100644 (file)
index 0000000..4bfaa99
--- /dev/null
+++ b/base64.h
@@ -0,0 +1,4 @@
+int uudecode(char const *src, size_t encoded_size, char **result,
+               size_t *decoded_size);
+int base64_decode(char const *src, size_t encoded_size, char **result,
+               size_t *decoded_size);
index f9115fd652962881c1e7548c3cbac55e724305df..8d4ce59a0eec619086b0e8234903efaed945ef37 100644 (file)
@@ -418,6 +418,7 @@ if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then
                close_on_fork
                mm
                crypt_common
+               base64
                ipc
                dccp_send
                fd
@@ -480,6 +481,7 @@ if test -n "$CRYPTOLIB"; then
                client_common
                buffer_tree
                crypt_common
+               base64
                version
                ggo
        "
@@ -523,6 +525,7 @@ if test -n "$CRYPTOLIB"; then
                stat
                net
                crypt_common
+               base64
                sideband
                time
                grab_client
diff --git a/crypt.c b/crypt.c
index 610d2057947dba6757526127a317f8b87bf7eebc..f227eb39907691d24f0eae360c04027c1653be75 100644 (file)
--- a/crypt.c
+++ b/crypt.c
@@ -23,6 +23,7 @@
 #include "crypt.h"
 #include "fd.h"
 #include "crypt_backend.h"
+#include "base64.h"
 
 struct asymmetric_key {
        RSA *rsa;
@@ -158,7 +159,7 @@ int get_asymmetric_key(const char *key_file, int private,
        struct asymmetric_key *key = NULL;
        void *map = NULL;
        unsigned char *blob = NULL;
-       size_t map_size, blob_size, decoded_size;
+       size_t map_size, encoded_size, decoded_size;
        int ret, ret2;
        char *cp;
 
@@ -180,16 +181,11 @@ int get_asymmetric_key(const char *key_file, int private,
                goto out;
        }
        cp = map + ret;
+       encoded_size = map_size - ret;
        PARA_INFO_LOG("decoding public rsa-ssh key %s\n", key_file);
-       ret = -ERRNO_TO_PARA_ERROR(EOVERFLOW);
-       if (map_size > INT_MAX / 4)
-               goto out_unmap;
-       blob_size = 2 * map_size;
-       blob = para_malloc(blob_size);
-       ret = uudecode(cp, blob, blob_size);
+       ret = uudecode(cp, encoded_size, (char **)&blob, &decoded_size);
        if (ret < 0)
                goto out_unmap;
-       decoded_size = ret;
        ret = check_ssh_key_header(blob, decoded_size);
        if (ret < 0)
                goto out_unmap;
index 06c86d74b8c452bc761dafe75ad6ee792a1f15d9..f9a69d9490057382126e4efe04a15bfda9025419 100644 (file)
@@ -13,7 +13,5 @@
 
 size_t is_ssh_rsa_key(char *data, size_t size);
 uint32_t read_ssh_u32(const void *vp);
-int uudecode(const char *src, unsigned char *target, size_t targsize);
 int check_ssh_key_header(const unsigned char *blob, int blen);
 int check_key_file(const char *file, bool private_key);
-int base64_decode(char const *src, unsigned char *target, size_t targsize);
index 022692adc52ce1cb8fe4ec925e93362e89b4b754..b39ee5e4cb6fce2b28a3b87034350f321d7cd75d 100644 (file)
@@ -45,166 +45,6 @@ size_t is_ssh_rsa_key(char *data, size_t size)
        return cp - data;
 }
 
-/*
- * This base64/uudecode stuff below is taken from openssh-5.2p1, Copyright (c)
- * 1996 by Internet Software Consortium.  Portions Copyright (c) 1995 by
- * International Business Machines, Inc.
- */
-
-static const char Base64[] =
-       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static const char Pad64 = '=';
-
-/**
- * base64-decode a buffer.
- *
- * \param src The buffer to decode.
- * \param target Result is stored here.
- * \param targsize Number of bytes of \a target.
- *
- * Skips all whitespace anywhere. Converts characters, four at a time, starting
- * at (or after) src from base - 64 numbers into three 8 bit bytes in the
- * target area.
- *
- * \return The number of data bytes stored at the target, -E_BASE64 on errors.
- */
-int base64_decode(char const *src, unsigned char *target, size_t targsize)
-{
-       unsigned int tarindex, state;
-       int ch;
-       char *pos;
-
-       state = 0;
-       tarindex = 0;
-
-       while ((ch = *src++) != '\0') {
-               if (para_isspace(ch)) /* Skip whitespace anywhere. */
-                       continue;
-
-               if (ch == Pad64)
-                       break;
-
-               pos = strchr(Base64, ch);
-               if (pos == NULL) /* A non-base64 character. */
-                       return -E_BASE64;
-
-               switch (state) {
-               case 0:
-                       if (tarindex >= targsize)
-                               return -E_BASE64;
-                       target[tarindex] = (pos - Base64) << 2;
-                       state = 1;
-                       break;
-               case 1:
-                       if (tarindex + 1 >= targsize)
-                               return -E_BASE64;
-                       target[tarindex] |= (pos - Base64) >> 4;
-                       target[tarindex + 1] = ((pos - Base64) & 0x0f) << 4;
-                       tarindex++;
-                       state = 2;
-                       break;
-               case 2:
-                       if (tarindex + 1 >= targsize)
-                               return -E_BASE64;
-                       target[tarindex] |= (pos - Base64) >> 2;
-                       target[tarindex + 1] = ((pos - Base64) & 0x03) << 6;
-                       tarindex++;
-                       state = 3;
-                       break;
-               case 3:
-                       if (tarindex >= targsize)
-                               return -E_BASE64;
-                       target[tarindex] |= pos - Base64;
-                       tarindex++;
-                       state = 0;
-                       break;
-               }
-       }
-
-       /*
-        * We are done decoding Base-64 chars.  Let's see if we ended
-        * on a byte boundary, and/or with erroneous trailing characters.
-        */
-
-       if (ch == Pad64) {              /* We got a pad char. */
-               ch = *src++;            /* Skip it, get next. */
-               switch (state) {
-               case 0:         /* Invalid = in first position */
-               case 1:         /* Invalid = in second position */
-                       return -E_BASE64;
-
-               case 2:         /* Valid, means one byte of info */
-                       /* Skip any number of spaces. */
-                       for (; ch != '\0'; ch = *src++)
-                               if (!isspace(ch))
-                                       break;
-                       /* Make sure there is another trailing = sign. */
-                       if (ch != Pad64)
-                               return -E_BASE64;
-                       ch = *src++;            /* Skip the = */
-                       /* Fall through to "single trailing =" case. */
-                       /* FALLTHROUGH */
-
-               case 3:         /* Valid, means two bytes of info */
-                       /*
-                        * We know this char is an =.  Is there anything but
-                        * whitespace after it?
-                        */
-                       for (; ch != '\0'; ch = *src++)
-                               if (!isspace(ch))
-                                       return -E_BASE64;
-
-                       /*
-                        * Now make sure for cases 2 and 3 that the "extra"
-                        * bits that slopped past the last full byte were
-                        * zeros.  If we don't check them, they become a
-                        * subliminal channel.
-                        */
-                       if (target[tarindex] != 0)
-                               return -E_BASE64;
-               }
-       } else {
-               /*
-                * We ended by seeing the end of the string.  Make sure we
-                * have no partial bytes lying around.
-                */
-               if (state != 0)
-                       return -E_BASE64;
-       }
-
-       return tarindex;
-}
-
-/**
- * uudecode a buffer.
- *
- * \param src The buffer to decode.
- * \param target Result buffer.
- * \param targsize The length of \a target in bytes.
- *
- * This is just a simple wrapper for base64_decode() which strips whitespace.
- *
- * \return The return value of the underlying call to base64_decode().
- */
-int uudecode(const char *src, unsigned char *target, size_t targsize)
-{
-       int len;
-       char *encoded, *p;
-
-       /* copy the 'readonly' source */
-       encoded = para_strdup(src);
-       /* skip whitespace and data */
-       for (p = encoded; *p == ' ' || *p == '\t'; p++)
-               ;
-       for (; *p != '\0' && *p != ' ' && *p != '\t'; p++)
-               ;
-       /* and remove trailing whitespace because base64_decode needs this */
-       *p = '\0';
-       len = base64_decode(encoded, target, targsize);
-       free(encoded);
-       return len;
-}
-
 /**
  * Read a 4-byte number from a buffer in big-endian format.
  *
diff --git a/error.h b/error.h
index 3fda5787774ce739754a58d74706c3081f1e9da5..ff85c8d18455e356f6e220c642b776570db86111 100644 (file)
--- a/error.h
+++ b/error.h
@@ -439,9 +439,10 @@ extern const char **para_errlist[];
 
 #define CRYPT_COMMON_ERRORS \
        PARA_ERROR(SSH_KEY_HEADER, "ssh key header not found"), \
-       PARA_ERROR(BASE64, "failed to base64-decode ssh public key"), \
        PARA_ERROR(KEY_PERM, "unprotected private key"), \
 
+#define BASE64_ERRORS \
+       PARA_ERROR(BASE64, "base64 decode error"), \
 
 #define CRYPT_ERRORS \
        PARA_ERROR(PRIVATE_KEY, "can not read private key"), \
index 3c6c1ad1d74c234f56f289a46fad98cede2a5140..289748e84d16dbca133b9280dc2027e5d9c2ec67 100644 (file)
--- a/gcrypt.c
+++ b/gcrypt.c
@@ -15,6 +15,7 @@
 #include "crypt.h"
 #include "crypt_backend.h"
 #include "fd.h"
+#include "base64.h"
 
 //#define GCRYPT_DEBUG 1
 
@@ -239,12 +240,11 @@ static int decode_key(const char *key_file, const char *header_str,
                key[j++] = begin[i];
        }
        key[j] = '\0';
-       blob_size = key_size * 2;
-       blob = para_malloc(blob_size);
-       ret = base64_decode(key, blob, blob_size);
+       ret = base64_decode(key, j, (char **)&blob, &blob_size);
        free(key);
        if (ret < 0)
                goto free_unmap;
+       ret = blob_size;
        goto unmap;
 free_unmap:
        free(blob);
@@ -606,13 +606,9 @@ static int get_ssh_public_key(unsigned char *data, int size, gcry_sexp_t *result
        gcry_mpi_t e = NULL, n = NULL;
 
        PARA_DEBUG_LOG("decoding %d byte public rsa-ssh key\n", size);
-       if (size > INT_MAX / 4)
-               return -ERRNO_TO_PARA_ERROR(EOVERFLOW);
-       blob = para_malloc(2 * size);
-       ret = uudecode((char *)data, blob, 2 * size);
+       ret = uudecode((char *)data, size, (char **)&blob, &decoded_size);
        if (ret < 0)
                goto free_blob;
-       decoded_size = ret;
        end = blob + decoded_size;
        dump_buffer("decoded key", blob, decoded_size);
        ret = check_ssh_key_header(blob, decoded_size);