From: Andre Noll Date: Wed, 9 Feb 2011 17:18:17 +0000 (+0100) Subject: Speed up the oggdec filter and avoid wasting tons of memory. X-Git-Tag: v0.4.6~27^2 X-Git-Url: http://git.tuebingen.mpg.de/?p=paraslash.git;a=commitdiff_plain;h=f44d27b634fe4a26efc38de75e8b84524d156c4b;ds=sidebyside Speed up the oggdec filter and avoid wasting tons of memory. Calls to ov_read appear to return at most 4K, no matter how much data was requested. We allocated 64K per output chunk, so 60K of that space is wasted. On the other hand we need large output buffers in order to not sacrifice performance when para_filter decodes to stdout. Fix this flaw by increasing the oggdec output buffer size from 64K to 640K, calling ov_read() until the output buffer is full or there is nothing left to read, and then reallocating the buffer to the amount of bytes actually read. Assuming CD audio, the 640K output buffer size roughly matches the decoded size of the 32K input buffer used by the stdin task, so each input buffer corresponds roughly to one output buffer. The patched version performs almost identically to the oggdec reference implementation while previous versions were up to a factor of 2 slower. --- diff --git a/oggdec_filter.c b/oggdec_filter.c index 94c284a7..44d299fe 100644 --- a/oggdec_filter.c +++ b/oggdec_filter.c @@ -113,7 +113,7 @@ static void ogg_close(struct filter_node *fn) fn->private_data = NULL; } -#define OGGDEC_OUTPUT_CHUNK_SIZE (64 * 1024) +#define OGGDEC_OUTPUT_CHUNK_SIZE (640 * 1024) static int oggdec_execute(struct btr_node *btrn, const char *cmd, char **result) { @@ -200,6 +200,8 @@ static void ogg_post_select(__a_unused struct sched *s, struct task *t) struct private_oggdec_data *pod = fn->private_data; struct btr_node *btrn = fn->btrn; int ret, ns; + char *out; + ssize_t read_ret, have; pod->converted = 0; t->error = 0; @@ -214,27 +216,36 @@ static void ogg_post_select(__a_unused struct sched *s, struct task *t) if (ret <= 0) goto out; } - for (;;) { - char *out = para_malloc(OGGDEC_OUTPUT_CHUNK_SIZE); - ssize_t read_ret = ov_read(pod->vf, out, OGGDEC_OUTPUT_CHUNK_SIZE, + out = para_malloc(OGGDEC_OUTPUT_CHUNK_SIZE); + for (have = 0;;) { + read_ret = ov_read(pod->vf, out + have, + OGGDEC_OUTPUT_CHUNK_SIZE - have, ENDIAN, 2 /* 16 bit */, 1 /* signed */, NULL); btr_consume(btrn, pod->converted); pod->converted = 0; if (read_ret <= 0) - free(out); - ret = ns; - if (read_ret == OV_HOLE) /* avoid buffer underruns */ - fn->min_iqs = 9000; - if (read_ret == 0 || read_ret == OV_HOLE) - goto out; - ret = -E_OGGDEC_BADLINK; - if (read_ret < 0) - goto out; - btr_add_output(out, read_ret, btrn); + break; + have += read_ret; + if (have >= OGGDEC_OUTPUT_CHUNK_SIZE) + break; + } + if (have == 0) + free(out); + else if (have < OGGDEC_OUTPUT_CHUNK_SIZE) + out = para_realloc(out, have); + if (have > 0) { + btr_add_output(out, have, btrn); fn->min_iqs = 0; - if (btr_get_output_queue_size(btrn) > 128 * 1024) - return; /* enough data for the moment */ } + ret = ns; + if (read_ret == OV_HOLE) /* avoid buffer underruns */ + fn->min_iqs = 9000; + if (read_ret == 0 || read_ret == OV_HOLE) + goto out; + ret = -E_OGGDEC_BADLINK; + if (read_ret < 0) + goto out; + ret = 0; out: if (ret < 0) { t->error = ret;