From: Andre Noll <maan@tuebingen.mpg.de>
Date: Tue, 20 Dec 2016 14:40:09 +0000 (+0100)
Subject: Merge branch 'refs/heads/t/base64'
X-Git-Tag: v0.5.7~20
X-Git-Url: http://git.tuebingen.mpg.de/?a=commitdiff_plain;h=78cc9c57ad9b1cc389956957030d37f71cb60a07;hp=904e302f0b64887f18c9e2fec7b0bb405675ad22;p=paraslash.git

Merge branch 'refs/heads/t/base64'

A couple of patches which move the base64 code to a separate file,
and improve on it. Was cooking for several months.

* refs/heads/t/base64:
  base64: Speed up decoder by using a table.
  base64: Use para_isspace() everywhere.
  base64: Trivial whitespace fixes.
  base64: Replace Pad64 variable by macro.
  base64: Remove unnecessary overflow checks.
  base64: Saner semantics for base64_decode() and uudecode().
  Move base64 implementation to own file.
---

diff --git a/NEWS.md b/NEWS.md
index afc7f105..b0862d5f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@ NEWS
 ------------------------------------------
 0.5.7 (to be announced) "semantic density"
 ------------------------------------------
-
+- Speedup of the base64 decoder.
 - One of the two source browsers has been removed from the web pages.
   The doxygen API reference still contains an HTML version of each
   source file.
diff --git a/base64.c b/base64.c
new file mode 100644
index 00000000..7b8fe292
--- /dev/null
+++ b/base64.c
@@ -0,0 +1,205 @@
+/*
+ * The code in this file was taken from openssh-5.2p1, Copyright (c) 1996 by
+ * Internet Software Consortium.  Portions Copyright (c) 1995 by International
+ * Business Machines, Inc.
+ */
+
+/** \file base64.c Uudecode and base64decode implementation. */
+
+#include <regex.h>
+
+#include "para.h"
+#include "error.h"
+#include "base64.h"
+#include "string.h"
+
+static const char Base64[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const unsigned char base64_tab[256] = {
+	255, 255, 255, 255, 255, 255, 255, 255, /* 00-07 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 08-0f */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 10-17 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 18-1f */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 20-2f */
+	255, 255, 255,  62, 255, 255, 255,  63, /* 28-2f */
+	52 ,  53,  54,  55,  56,  57,  58,  59, /* 30-37 */
+	60 ,  61, 255, 255, 255, 255, 255, 255, /* 38-3f */
+	255,   0,   1,   2,   3,   4,   5,   6, /* 40-47 */
+	7  ,   8,   9,  10,  11,  12,  13,  14, /* 48-4f */
+	15 ,  16,  17,  18,  19,  20,  21,  22, /* 50-57 */
+	23 ,  24,  25, 255, 255, 255, 255, 255, /* 58-5f */
+	255,  26,  27,  28,  29,  30,  31,  32, /* 60-6f */
+	33 ,  34,  35,  36,  37,  38,  39,  40, /* 68-6f */
+	41 ,  42,  43,  44,  45,  46,  47,  48, /* 70-77 */
+	49 ,  50,  51, 255, 255, 255, 255, 255, /* 78-7f */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 80-87 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 88-8f */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 90-97 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* 98-9f */
+	255, 255, 255, 255, 255, 255, 255, 255, /* a0-a7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* a8-af */
+	255, 255, 255, 255, 255, 255, 255, 255, /* b0-b7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* b8-bf */
+	255, 255, 255, 255, 255, 255, 255, 255, /* c0-c7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* c8-cf */
+	255, 255, 255, 255, 255, 255, 255, 255, /* d0-d7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* d8-df */
+	255, 255, 255, 255, 255, 255, 255, 255, /* e0-e7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* e8-ef */
+	255, 255, 255, 255, 255, 255, 255, 255, /* f0-f7 */
+	255, 255, 255, 255, 255, 255, 255, 255, /* f8-ff */
+};
+
+/** Maximal possible size of the decoded data. */
+#define BASE64_MAX_DECODED_SIZE(_encoded_size) ((_encoded_size) / 4 * 3)
+
+#define PAD64 '='
+/**
+ * base64-decode a buffer.
+ *
+ * \param src The buffer to decode.
+ * \param encoded_size The special value -1 means: look for terminating zero byte.
+ * \param result Points to dynamically allocated target buffer on success.
+ * \param decoded_size Number of bytes written to \a result.
+ *
+ * Skips all whitespace anywhere. Converts characters, four at a time, starting
+ * at (or after) src from base - 64 numbers into three 8 bit bytes in the
+ * target area.
+ *
+ * It is OK to pass a \p NULL pointer as \a decoded_size. The result is
+ * terminated with a zero byte.
+ *
+ * \return Standard. The contents of result \a and \a decoded_size are
+ * undefined on failure.
+ */
+int base64_decode(char const *src, size_t encoded_size, char **result,
+		size_t *decoded_size)
+{
+	size_t i, j, state; /* source/target indices */
+	const char *end = src + encoded_size, *p;
+	unsigned char *target, uch;
+
+	if (encoded_size == (size_t)-1)
+		encoded_size = strlen(src);
+	target = para_malloc(BASE64_MAX_DECODED_SIZE(encoded_size) + 1);
+
+	for (
+		i = 0, j = 0, state = 0;
+		i < encoded_size && (uch = src[i]) != '\0';
+		i++
+	) {
+		if (para_isspace(uch)) /* Skip whitespace anywhere. */
+			continue;
+		if (uch == PAD64)
+			break;
+		if (base64_tab[uch] == 255) /* A non-base64 character. */
+			goto fail;
+		uch = base64_tab[uch];
+		switch (state) {
+		case 0:
+			target[j] = uch << 2;
+			break;
+		case 1:
+			target[j] |= uch >> 4;
+			j++;
+			target[j] = (uch & 0x0f) << 4;
+			break;
+		case 2:
+			target[j] |= uch >> 2;
+			j++;
+			target[j] = (uch & 0x03) << 6;
+			break;
+		case 3:
+			target[j] |= uch;
+			j++;
+			break;
+		}
+		state = (state + 1) % 4;
+	}
+	p = (i < encoded_size)? src + i : NULL;
+	/*
+	 * We are done decoding Base-64 chars.  Let's see if we ended
+	 * on a byte boundary, and/or with erroneous trailing characters.
+	 */
+	if (p && *p == PAD64) { /* We got a pad char. Skip it, get next. */
+		p++;
+		switch (state) {
+		case 0: /* Invalid = in first position */
+		case 1: /* Invalid = in second position */
+			goto fail;
+
+		case 2: /* Valid, means one byte of info */
+			/* Skip any number of spaces. */
+			for (; p < end && *p != '\0'; p++)
+				if (!para_isspace(*p))
+					break;
+			/* Make sure there is another trailing = sign. */
+			if (*p != PAD64)
+				goto fail;
+			/* Fall through to "single trailing =" case. */
+			p++;
+
+		case 3: /* Valid, means two bytes of info */
+			/*
+			 * We know this char is an =.  Is there anything but
+			 * whitespace after it?
+			 */
+			for (; p < end && *p != '\0'; p++)
+				if (!para_isspace(*p))
+					goto fail;
+			/*
+			 * Now make sure for cases 2 and 3 that the "extra"
+			 * bits that slopped past the last full byte were
+			 * zeros.  If we don't check them, they become a
+			 * subliminal channel.
+			 */
+			if (target[j] != 0)
+				goto fail;
+		}
+	} else {
+		/*
+		 * We ended by seeing the end of the string.  Make sure we
+		 * have no partial bytes lying around.
+		 */
+		if (state != 0)
+			goto fail;
+	}
+	/* success */
+	target[j] = '\0'; /* just to be sure */
+	if (decoded_size)
+		*decoded_size = j;
+	*result = (char *)target;
+	return 1;
+fail:
+	free(target);
+	return -E_BASE64;
+}
+
+/**
+ * Decode a buffer using the uuencode Base64 algorithm.
+ *
+ * \param src The buffer to decode.
+ * \param encoded_size Number of input bytes in the source buffer.
+ * \param result Contains the decoded data on success.
+ * \param decoded_size Number of output bytes on success.
+ *
+ * This is just a simple wrapper for \ref base64_decode() which strips
+ * whitespace.
+ *
+ * \return The return value of the underlying call to \ref base64_decode().
+ *
+ * \sa uuencode(1), uudecode(1).
+ */
+int uudecode(char const *src, size_t encoded_size, char **result,
+		size_t *decoded_size)
+{
+	const char *end = src + encoded_size, *p;
+
+	/* skip whitespace and data */
+	for (p = src; p < end && (*p == ' ' || *p == '\t'); p++)
+		;
+	for (; p < end && *p != '\0' && *p != ' ' && *p != '\t'; p++)
+		;
+	/* and remove trailing whitespace because base64_decode needs this */
+	return base64_decode(src, p - src, result, decoded_size);
+}
diff --git a/base64.h b/base64.h
new file mode 100644
index 00000000..4bfaa99d
--- /dev/null
+++ b/base64.h
@@ -0,0 +1,4 @@
+int uudecode(char const *src, size_t encoded_size, char **result,
+		size_t *decoded_size);
+int base64_decode(char const *src, size_t encoded_size, char **result,
+		size_t *decoded_size);
diff --git a/configure.ac b/configure.ac
index f9115fd6..8d4ce59a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -418,6 +418,7 @@ if test -n "$CRYPTOLIB" && test $HAVE_OSL = yes; then
 		close_on_fork
 		mm
 		crypt_common
+		base64
 		ipc
 		dccp_send
 		fd
@@ -480,6 +481,7 @@ if test -n "$CRYPTOLIB"; then
 		client_common
 		buffer_tree
 		crypt_common
+		base64
 		version
 		ggo
 	"
@@ -523,6 +525,7 @@ if test -n "$CRYPTOLIB"; then
 		stat
 		net
 		crypt_common
+		base64
 		sideband
 		time
 		grab_client
diff --git a/crypt.c b/crypt.c
index 610d2057..f227eb39 100644
--- a/crypt.c
+++ b/crypt.c
@@ -23,6 +23,7 @@
 #include "crypt.h"
 #include "fd.h"
 #include "crypt_backend.h"
+#include "base64.h"
 
 struct asymmetric_key {
 	RSA *rsa;
@@ -158,7 +159,7 @@ int get_asymmetric_key(const char *key_file, int private,
 	struct asymmetric_key *key = NULL;
 	void *map = NULL;
 	unsigned char *blob = NULL;
-	size_t map_size, blob_size, decoded_size;
+	size_t map_size, encoded_size, decoded_size;
 	int ret, ret2;
 	char *cp;
 
@@ -180,16 +181,11 @@ int get_asymmetric_key(const char *key_file, int private,
 		goto out;
 	}
 	cp = map + ret;
+	encoded_size = map_size - ret;
 	PARA_INFO_LOG("decoding public rsa-ssh key %s\n", key_file);
-	ret = -ERRNO_TO_PARA_ERROR(EOVERFLOW);
-	if (map_size > INT_MAX / 4)
-		goto out_unmap;
-	blob_size = 2 * map_size;
-	blob = para_malloc(blob_size);
-	ret = uudecode(cp, blob, blob_size);
+	ret = uudecode(cp, encoded_size, (char **)&blob, &decoded_size);
 	if (ret < 0)
 		goto out_unmap;
-	decoded_size = ret;
 	ret = check_ssh_key_header(blob, decoded_size);
 	if (ret < 0)
 		goto out_unmap;
diff --git a/crypt_backend.h b/crypt_backend.h
index 06c86d74..f9a69d94 100644
--- a/crypt_backend.h
+++ b/crypt_backend.h
@@ -13,7 +13,5 @@
 
 size_t is_ssh_rsa_key(char *data, size_t size);
 uint32_t read_ssh_u32(const void *vp);
-int uudecode(const char *src, unsigned char *target, size_t targsize);
 int check_ssh_key_header(const unsigned char *blob, int blen);
 int check_key_file(const char *file, bool private_key);
-int base64_decode(char const *src, unsigned char *target, size_t targsize);
diff --git a/crypt_common.c b/crypt_common.c
index 022692ad..b39ee5e4 100644
--- a/crypt_common.c
+++ b/crypt_common.c
@@ -45,166 +45,6 @@ size_t is_ssh_rsa_key(char *data, size_t size)
 	return cp - data;
 }
 
-/*
- * This base64/uudecode stuff below is taken from openssh-5.2p1, Copyright (c)
- * 1996 by Internet Software Consortium.  Portions Copyright (c) 1995 by
- * International Business Machines, Inc.
- */
-
-static const char Base64[] =
-	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static const char Pad64 = '=';
-
-/**
- * base64-decode a buffer.
- *
- * \param src The buffer to decode.
- * \param target Result is stored here.
- * \param targsize Number of bytes of \a target.
- *
- * Skips all whitespace anywhere. Converts characters, four at a time, starting
- * at (or after) src from base - 64 numbers into three 8 bit bytes in the
- * target area.
- *
- * \return The number of data bytes stored at the target, -E_BASE64 on errors.
- */
-int base64_decode(char const *src, unsigned char *target, size_t targsize)
-{
-	unsigned int tarindex, state;
-	int ch;
-	char *pos;
-
-	state = 0;
-	tarindex = 0;
-
-	while ((ch = *src++) != '\0') {
-		if (para_isspace(ch)) /* Skip whitespace anywhere. */
-			continue;
-
-		if (ch == Pad64)
-			break;
-
-		pos = strchr(Base64, ch);
-		if (pos == NULL) /* A non-base64 character. */
-			return -E_BASE64;
-
-		switch (state) {
-		case 0:
-			if (tarindex >= targsize)
-				return -E_BASE64;
-			target[tarindex] = (pos - Base64) << 2;
-			state = 1;
-			break;
-		case 1:
-			if (tarindex + 1 >= targsize)
-				return -E_BASE64;
-			target[tarindex] |= (pos - Base64) >> 4;
-			target[tarindex + 1] = ((pos - Base64) & 0x0f) << 4;
-			tarindex++;
-			state = 2;
-			break;
-		case 2:
-			if (tarindex + 1 >= targsize)
-				return -E_BASE64;
-			target[tarindex] |= (pos - Base64) >> 2;
-			target[tarindex + 1] = ((pos - Base64) & 0x03) << 6;
-			tarindex++;
-			state = 3;
-			break;
-		case 3:
-			if (tarindex >= targsize)
-				return -E_BASE64;
-			target[tarindex] |= pos - Base64;
-			tarindex++;
-			state = 0;
-			break;
-		}
-	}
-
-	/*
-	 * We are done decoding Base-64 chars.  Let's see if we ended
-	 * on a byte boundary, and/or with erroneous trailing characters.
-	 */
-
-	if (ch == Pad64) {		/* We got a pad char. */
-		ch = *src++;		/* Skip it, get next. */
-		switch (state) {
-		case 0:		/* Invalid = in first position */
-		case 1:		/* Invalid = in second position */
-			return -E_BASE64;
-
-		case 2:		/* Valid, means one byte of info */
-			/* Skip any number of spaces. */
-			for (; ch != '\0'; ch = *src++)
-				if (!isspace(ch))
-					break;
-			/* Make sure there is another trailing = sign. */
-			if (ch != Pad64)
-				return -E_BASE64;
-			ch = *src++;		/* Skip the = */
-			/* Fall through to "single trailing =" case. */
-			/* FALLTHROUGH */
-
-		case 3:		/* Valid, means two bytes of info */
-			/*
-			 * We know this char is an =.  Is there anything but
-			 * whitespace after it?
-			 */
-			for (; ch != '\0'; ch = *src++)
-				if (!isspace(ch))
-					return -E_BASE64;
-
-			/*
-			 * Now make sure for cases 2 and 3 that the "extra"
-			 * bits that slopped past the last full byte were
-			 * zeros.  If we don't check them, they become a
-			 * subliminal channel.
-			 */
-			if (target[tarindex] != 0)
-				return -E_BASE64;
-		}
-	} else {
-		/*
-		 * We ended by seeing the end of the string.  Make sure we
-		 * have no partial bytes lying around.
-		 */
-		if (state != 0)
-			return -E_BASE64;
-	}
-
-	return tarindex;
-}
-
-/**
- * uudecode a buffer.
- *
- * \param src The buffer to decode.
- * \param target Result buffer.
- * \param targsize The length of \a target in bytes.
- *
- * This is just a simple wrapper for base64_decode() which strips whitespace.
- *
- * \return The return value of the underlying call to base64_decode().
- */
-int uudecode(const char *src, unsigned char *target, size_t targsize)
-{
-	int len;
-	char *encoded, *p;
-
-	/* copy the 'readonly' source */
-	encoded = para_strdup(src);
-	/* skip whitespace and data */
-	for (p = encoded; *p == ' ' || *p == '\t'; p++)
-		;
-	for (; *p != '\0' && *p != ' ' && *p != '\t'; p++)
-		;
-	/* and remove trailing whitespace because base64_decode needs this */
-	*p = '\0';
-	len = base64_decode(encoded, target, targsize);
-	free(encoded);
-	return len;
-}
-
 /**
  * Read a 4-byte number from a buffer in big-endian format.
  *
diff --git a/error.h b/error.h
index 3fda5787..ff85c8d1 100644
--- a/error.h
+++ b/error.h
@@ -439,9 +439,10 @@ extern const char **para_errlist[];
 
 #define CRYPT_COMMON_ERRORS \
 	PARA_ERROR(SSH_KEY_HEADER, "ssh key header not found"), \
-	PARA_ERROR(BASE64, "failed to base64-decode ssh public key"), \
 	PARA_ERROR(KEY_PERM, "unprotected private key"), \
 
+#define BASE64_ERRORS \
+	PARA_ERROR(BASE64, "base64 decode error"), \
 
 #define CRYPT_ERRORS \
 	PARA_ERROR(PRIVATE_KEY, "can not read private key"), \
diff --git a/gcrypt.c b/gcrypt.c
index 3c6c1ad1..289748e8 100644
--- a/gcrypt.c
+++ b/gcrypt.c
@@ -15,6 +15,7 @@
 #include "crypt.h"
 #include "crypt_backend.h"
 #include "fd.h"
+#include "base64.h"
 
 //#define GCRYPT_DEBUG 1
 
@@ -239,12 +240,11 @@ static int decode_key(const char *key_file, const char *header_str,
 		key[j++] = begin[i];
 	}
 	key[j] = '\0';
-	blob_size = key_size * 2;
-	blob = para_malloc(blob_size);
-	ret = base64_decode(key, blob, blob_size);
+	ret = base64_decode(key, j, (char **)&blob, &blob_size);
 	free(key);
 	if (ret < 0)
 		goto free_unmap;
+	ret = blob_size;
 	goto unmap;
 free_unmap:
 	free(blob);
@@ -606,13 +606,9 @@ static int get_ssh_public_key(unsigned char *data, int size, gcry_sexp_t *result
 	gcry_mpi_t e = NULL, n = NULL;
 
 	PARA_DEBUG_LOG("decoding %d byte public rsa-ssh key\n", size);
-	if (size > INT_MAX / 4)
-		return -ERRNO_TO_PARA_ERROR(EOVERFLOW);
-	blob = para_malloc(2 * size);
-	ret = uudecode((char *)data, blob, 2 * size);
+	ret = uudecode((char *)data, size, (char **)&blob, &decoded_size);
 	if (ret < 0)
 		goto free_blob;
-	decoded_size = ret;
 	end = blob + decoded_size;
 	dump_buffer("decoded key", blob, decoded_size);
 	ret = check_ssh_key_header(blob, decoded_size);