server: Store max chunk size in database.
[paraslash.git] / wma_afh.c
1 /*
2 * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ps) for (_f = (_buf); \
22 _f + (_ps) < (_buf) + (_size); \
23 _f += (_ps))
24
25 /*
26 * Must be called on a frame boundary, e.g. start + header_len.
27 * \return Frame count, superframe count via *num_superframes.
28 */
29 static int count_frames(const char *buf, int buf_size, uint32_t packet_size,
30 int *num_superframes)
31 {
32 int fc = 0, sfc = 0; /* frame count, superframe count */
33 const uint8_t *p;
34
35
36 FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, packet_size) {
37 fc += p[WMA_FRAME_SKIP] & 0x0f;
38 sfc++;
39 }
40 PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41 if (num_superframes)
42 *num_superframes = sfc;
43 return fc;
44 }
45
46 /*
47 * put_utf8() and get_str16() below are based on macros in libavutil/common.h
48 * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
49 * <michaelni@gmx.at>.
50 */
51
52 /*
53 * Convert a 32-bit Unicode character to its UTF-8 encoded form.
54 *
55 * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
56 * in the general case, depending on the length of the converted Unicode
57 * character.
58 *
59 * \param result Where the converted UTF-8 bytes are written.
60 */
61 static int put_utf8(uint32_t val, char *result)
62 {
63 char *out = result;
64 int bytes, shift;
65 uint32_t in = val;
66
67 if (in < 0x80) {
68 *out++ = in;
69 return 1;
70 }
71 bytes = (wma_log2(in) + 4) / 5;
72 shift = (bytes - 1) * 6;
73 *out++ = (256 - (256 >> bytes)) | (in >> shift);
74 while (shift >= 6) {
75 shift -= 6;
76 *out++ = 0x80 | ((in >> shift) & 0x3f);
77 }
78 return out - result;
79 }
80
81 static char *get_str16(const char *in, int len)
82 {
83 const char *p = in;
84 int out_size = 0, out_len = 0;
85 char *out = NULL;
86
87 len /= 2;
88 while (len--) {
89 uint32_t x;
90 if (out_len + 7 + 1 >= out_size) {
91 out_size = 2 * out_size + 50;
92 out = para_realloc(out, out_size);
93 }
94 x = read_u16(p);
95 p += 2;
96 out_len += put_utf8(x, out + out_len);
97 if (x == 0)
98 return out;
99 }
100 if (out)
101 out[out_len] = '\0';
102 return out;
103 }
104
105 static const char content_description_header[] = {
106 0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
107 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
108 };
109
110 static const char extended_content_header[] = {
111 0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
112 0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
113 };
114
115 static const char year_tag_header[] = { /* WM/Year */
116 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
117 0x65, 0x00, 0x61, 0x00, 0x72, 0x00
118 };
119
120 static const char album_tag_header[] = { /* WM/AlbumTitle */
121 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
122 0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
123 0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
124 0x65, 0x00
125 };
126
127 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
128 {
129 const char *p, *end = buf + buf_size, *q;
130 uint16_t len1, len2, len3, len4;
131
132 p = search_pattern(content_description_header,
133 sizeof(content_description_header), buf, buf_size);
134 if (!p || p + 34 >= end) {
135 PARA_NOTICE_LOG("content description header not found\n");
136 goto next;
137 }
138 p += 24;
139 len1 = read_u16(p);
140 p += 2;
141 len2 = read_u16(p);
142 p += 2;
143 len3 = read_u16(p);
144 p += 2;
145 len4 = read_u16(p);
146 p += 2;
147 /* ignore length of the rating information */
148 p += 2;
149 if (p + len1 >= end)
150 goto next;
151 ti->title = get_str16(p, len1);
152 p += len1;
153 if (p + len2 >= end)
154 goto next;
155 ti->artist = get_str16(p, len2);
156 p += len2 + len3;
157 if (p + len4 >= end)
158 goto next;
159 ti->comment = get_str16(p, len4);
160 next:
161 p = search_pattern(extended_content_header, sizeof(extended_content_header),
162 buf, buf_size);
163 if (!p) {
164 PARA_NOTICE_LOG("extended content header not found\n");
165 return;
166 }
167 q = search_pattern(year_tag_header, sizeof(year_tag_header),
168 p, end - p);
169 if (q) {
170 const char *r = q + sizeof(year_tag_header) + 6;
171 if (r < end)
172 ti->year = get_str16(r, end - r);
173 }
174 q = search_pattern(album_tag_header, sizeof(album_tag_header),
175 p, end - p);
176 if (q) {
177 const char *r = q + sizeof(album_tag_header) + 6;
178 if (r < end)
179 ti->album = get_str16(r, end - r);
180 }
181 }
182
183 static void set_chunk_tv(int frames_per_chunk, int frequency,
184 struct timeval *result)
185 {
186 uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
187 / frequency;
188
189 result->tv_sec = x / 1000 / 1000;
190 result->tv_usec = x % (1000 * 1000);
191 PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
192 }
193
194 /* Must be called on a frame boundary. */
195 static int wma_make_chunk_table(char *buf, size_t buf_size, uint32_t packet_size,
196 struct afh_info *afhi)
197 {
198 const uint8_t *f, *start = (uint8_t *)buf;
199 int j, frames_per_chunk;
200 size_t ct_size = 250;
201 int ret, count = 0, num_frames, num_superframes;
202
203 afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
204 afhi->chunk_table[0] = 0;
205 afhi->chunk_table[1] = afhi->header_len;
206
207 num_frames = count_frames(buf, buf_size, packet_size,
208 &num_superframes);
209 ret = -E_NO_WMA;
210 if (num_frames == 0 || num_superframes == 0)
211 goto fail;
212 afhi->seconds_total = num_frames * 2048 /* FIXME */
213 / afhi->frequency;
214 frames_per_chunk = num_frames / num_superframes / 2;
215 PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
216 j = 1;
217 FOR_EACH_FRAME(f, start, buf_size, packet_size) {
218 count += f[WMA_FRAME_SKIP] & 0x0f;
219 while (count > j * frames_per_chunk) {
220 j++;
221 if (j >= ct_size) {
222 ct_size *= 2;
223 afhi->chunk_table = para_realloc(
224 afhi->chunk_table,
225 ct_size * sizeof(uint32_t));
226 }
227 afhi->chunk_table[j] = f - start + afhi->header_len
228 + packet_size;
229 }
230 }
231 afhi->chunks_total = j;
232 set_max_chunk_size(afhi);
233 set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
234 return 1;
235 fail:
236 free(afhi->chunk_table);
237 return ret;
238 }
239
240 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
241 struct afh_info *afhi)
242 {
243 int ret;
244 struct asf_header_info ahi;
245
246 ret = read_asf_header(map, numbytes, &ahi);
247 if (ret < 0)
248 return ret;
249 if (ret == 0)
250 return -E_NO_WMA;
251 afhi->bitrate = ahi.bit_rate / 1000;
252 if (ahi.sample_rate == 0)
253 return -E_NO_WMA;
254 afhi->frequency = ahi.sample_rate;
255 afhi->channels = ahi.channels;
256 afhi->header_len = ahi.header_len;
257
258 afhi->techinfo = make_message("%s%s%s%s%s",
259 ahi.use_exp_vlc? "exp vlc" : "",
260 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
261 ahi.use_bit_reservoir? "bit reservoir" : "",
262 (ahi.use_variable_block_len &&
263 (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
264 ahi.use_variable_block_len? "vbl" : ""
265 );
266 wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
267 ahi.packet_size, afhi);
268 read_asf_tags(map, ahi.header_len, &afhi->tags);
269 return 0;
270 }
271
272 struct asf_object {
273 char *ptr;
274 uint64_t size;
275 };
276
277 struct tag_object_nums {
278 int content_descr_obj_num;
279 int extended_content_descr_obj_num;
280 };
281
282 struct afs_top_level_header_object {
283 uint64_t size;
284 uint32_t num_objects;
285 uint8_t reserved1, reserved2;
286 struct asf_object *objects;
287 };
288
289 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
290
291 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
292 struct asf_object *objs, struct tag_object_nums *ton)
293 {
294 int i;
295 const char *p;
296
297 for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
298 if (p + 24 > src + size)
299 return -E_NO_WMA;
300 objs[i].ptr = (char *)p;
301 objs[i].size = read_u64(p + 16);
302 if (p + objs[i].size > src + size)
303 return -E_NO_WMA;
304
305 if (CHECK_HEADER(p, content_description_header))
306 ton->content_descr_obj_num = i;
307 else if (CHECK_HEADER(p, extended_content_header))
308 ton->extended_content_descr_obj_num = i;
309 }
310 return 1;
311 }
312
313 static const char top_level_header_object_guid[] = {
314 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
315 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
316 };
317
318 static int convert_utf8_to_utf16(char *src, char **dst)
319 {
320 /*
321 * Without specifying LE (little endian), iconv includes a byte order
322 * mark (e.g. 0xFFFE) at the beginning.
323 */
324 iconv_t cd = iconv_open("UTF-16LE", "UTF-8");
325 size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
326 char *inbuf, *outbuf;
327 int ret;
328
329 if (!src || !*src) {
330 *dst = para_calloc(2);
331 ret = 0;
332 goto out;
333 }
334 if (cd == (iconv_t) -1)
335 return -ERRNO_TO_PARA_ERROR(errno);
336 inbuf = src;
337 /* even though src is in UTF-8, strlen() should DTRT */
338 inbytes = inbytesleft = strlen(src);
339 outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
340 *dst = outbuf = para_malloc(outbytes);
341 sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
342 if (sz == (size_t)-1) {
343 ret = -ERRNO_TO_PARA_ERROR(errno);
344 goto out;
345 }
346 assert(outbytes >= outbytesleft);
347 assert(outbytes - outbytesleft < INT_MAX - 2);
348 ret = outbytes - outbytesleft;
349 outbuf = para_realloc(*dst, ret + 2);
350 outbuf[ret] = outbuf[ret + 1] = '\0';
351 ret += 2;
352 *dst = outbuf;
353 PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
354 out:
355 if (ret < 0)
356 free(*dst);
357 if (iconv_close(cd) < 0)
358 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
359 return ret;
360 }
361
362 /* The content description object contains artist, title, comment. */
363 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
364 struct asf_object *result)
365 {
366 const char *cr, *rating; /* orig data */
367 uint16_t orig_title_bytes, orig_artist_bytes, orig_cr_bytes,
368 orig_comment_bytes, orig_rating_bytes;
369 /* pointers to new UTF-16 tags */
370 char *artist = NULL, *title = NULL, *comment = NULL;
371 /* number of bytes in UTF-16 for the new tags */
372 int artist_bytes, title_bytes, comment_bytes, ret;
373 char *p, null[2] = "\0\0";
374
375 result->ptr = NULL;
376 result->size = 0;
377 ret = convert_utf8_to_utf16(tags->artist, &artist);
378 if (ret < 0)
379 return ret;
380 artist_bytes = ret;
381 ret = convert_utf8_to_utf16(tags->title, &title);
382 if (ret < 0)
383 goto out;
384 title_bytes = ret;
385 ret = convert_utf8_to_utf16(tags->comment, &comment);
386 if (ret < 0)
387 goto out;
388 comment_bytes = ret;
389
390 if (cdo) {
391 /*
392 * Sizes of the five fields (stored as 16-bit numbers) are
393 * located after the header (16 bytes) and the cdo size (8
394 * bytes).
395 */
396 orig_title_bytes = read_u16(cdo->ptr + 24);
397 orig_artist_bytes = read_u16(cdo->ptr + 26);
398 orig_cr_bytes = read_u16(cdo->ptr + 28);
399 orig_comment_bytes = read_u16(cdo->ptr + 30);
400 orig_rating_bytes = read_u16(cdo->ptr + 32);
401 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
402 rating = cr + orig_cr_bytes + orig_comment_bytes;
403 } else {
404 orig_title_bytes = 2;
405 orig_artist_bytes = 2;
406 orig_cr_bytes = 2;
407 orig_comment_bytes = 2;
408 orig_rating_bytes = 2;
409 cr = null;
410 rating = null;
411 }
412
413 /* compute size of result cdo */
414 result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
415 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
416 PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
417 p = result->ptr = para_malloc(result->size);
418 memcpy(p, content_description_header, 16);
419 p += 16;
420 write_u64(p, result->size);
421 p += 8;
422 write_u16(p, title_bytes);
423 p += 2;
424 write_u16(p, artist_bytes);
425 p += 2;
426 write_u16(p, orig_cr_bytes);
427 p += 2;
428 write_u16(p, comment_bytes);
429 p += 2;
430 write_u16(p, orig_rating_bytes);
431 p += 2;
432 memcpy(p, title, title_bytes);
433 p += title_bytes;
434 memcpy(p, artist, artist_bytes);
435 p += artist_bytes;
436 memcpy(p, cr, orig_cr_bytes);
437 p += orig_cr_bytes;
438 memcpy(p, comment, comment_bytes);
439 p += comment_bytes;
440 memcpy(p, rating, orig_rating_bytes);
441 p += orig_rating_bytes;
442 assert(p - result->ptr == result->size);
443 ret = 1;
444 out:
445 free(artist);
446 free(title);
447 free(comment);
448 return ret;
449 }
450
451 /* The extended content description object contains album and year. */
452 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
453 {
454 int ret;
455 char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
456 int album_bytes, year_bytes;
457
458 result->ptr = NULL;
459 result->size = 0;
460 ret = convert_utf8_to_utf16(tags->album, &album);
461 if (ret < 0)
462 return ret;
463 album_bytes = ret;
464 ret = convert_utf8_to_utf16(tags->year, &year);
465 if (ret < 0)
466 goto out;
467 year_bytes = ret;
468 result->size = 16 + 8 + 2; /* GUID, size, count */
469 /* name_length + name + null + data type + val length + val */
470 result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
471 result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
472
473 p = result->ptr = para_malloc(result->size);
474 memcpy(p, extended_content_header, 16);
475 p += 16;
476 write_u64(p, result->size);
477 p += 8;
478 write_u16(p, 2); /* count */
479 p += 2;
480
481 /* album */
482 write_u16(p, sizeof(album_tag_header) + 2);
483 p += 2;
484 memcpy(p, album_tag_header, sizeof(album_tag_header));
485 p += sizeof(album_tag_header);
486 memcpy(p, null, 2);
487 p += 2;
488 write_u16(p, 0); /* data type (UTF-16) */
489 p += 2;
490 write_u16(p, album_bytes);
491 p += 2;
492 memcpy(p, album, album_bytes);
493 p += album_bytes;
494
495 /* year */
496 write_u16(p, sizeof(year_tag_header));
497 p += 2;
498 memcpy(p, year_tag_header, sizeof(year_tag_header));
499 p += sizeof(year_tag_header);
500 memcpy(p, null, 2);
501 p += 2;
502 write_u16(p, 0); /* data type (UTF-16) */
503 p += 2;
504 write_u16(p, year_bytes);
505 p += 2;
506 memcpy(p, year, year_bytes);
507 p += year_bytes;
508 assert(p - result->ptr == result->size);
509 ret = 1;
510 out:
511 free(album);
512 free(year);
513 return ret;
514 }
515
516 static int write_output_file(int fd, const char *map, size_t mapsize,
517 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
518 struct asf_object *cdo, struct asf_object *ecdo)
519 {
520 int i, ret;
521 uint64_t sz; /* of the new header object */
522 uint32_t num_objects;
523 char tmp[8];
524
525 sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
526 for (i = 0; i < top->num_objects; i++) {
527 if (i == ton->content_descr_obj_num)
528 continue;
529 if (i == ton->extended_content_descr_obj_num)
530 continue;
531 sz += top->objects[i].size;
532 }
533 sz += cdo->size;
534 sz += ecdo->size;
535 num_objects = top->num_objects;
536 if (ton->content_descr_obj_num < 0)
537 num_objects++;
538 if (ton->extended_content_descr_obj_num < 0)
539 num_objects++;
540 ret = xwrite(fd, top_level_header_object_guid, 16);
541 if (ret < 0)
542 goto out;
543 write_u64(tmp, sz);
544 ret = xwrite(fd, tmp, 8);
545 if (ret < 0)
546 goto out;
547 write_u32(tmp, num_objects);
548 ret = xwrite(fd, tmp, 4);
549 if (ret < 0)
550 goto out;
551 write_u8(tmp, top->reserved1);
552 ret = xwrite(fd, tmp, 1);
553 if (ret < 0)
554 goto out;
555 write_u8(tmp, top->reserved2);
556 ret = xwrite(fd, tmp, 1);
557 if (ret < 0)
558 goto out;
559 /*
560 * Write cto and ecto as objects 0 and 1 if they did not exist in the
561 * original file.
562 */
563 if (ton->content_descr_obj_num < 0) {
564 ret = xwrite(fd, cdo->ptr, cdo->size);
565 if (ret < 0)
566 goto out;
567 }
568 if (ton->extended_content_descr_obj_num < 0) {
569 ret = xwrite(fd, ecdo->ptr, ecdo->size);
570 if (ret < 0)
571 goto out;
572 }
573
574 for (i = 0; i < top->num_objects; i++) {
575 char *buf = top->objects[i].ptr;
576 sz = top->objects[i].size;
577 if (i == ton->content_descr_obj_num) {
578 buf = cdo->ptr;
579 sz = cdo->size;
580 } else if (i == ton->extended_content_descr_obj_num) {
581 buf = ecdo->ptr;
582 sz = ecdo->size;
583 }
584 ret = xwrite(fd, buf, sz);
585 if (ret < 0)
586 goto out;
587 }
588 ret = xwrite(fd, map + top->size, mapsize - top->size);
589 out:
590 return ret;
591 }
592
593 static int wma_rewrite_tags(const char *map, size_t mapsize,
594 struct taginfo *tags, int fd,
595 __a_unused const char *filename)
596 {
597 struct afs_top_level_header_object top;
598 struct tag_object_nums ton = {-1, -1};
599 const char *p = map;
600 /* (extended) content description object */
601 struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
602 int ret;
603
604 /* guid + size + num_objects + 2 * reserved */
605 if (mapsize < 16 + 8 + 4 + 1 + 1)
606 return -E_NO_WMA;
607 if (memcmp(map, top_level_header_object_guid, 16))
608 return -E_NO_WMA;
609 p += 16;
610 top.size = read_u64(p);
611 PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
612 if (top.size >= mapsize)
613 return -E_NO_WMA;
614 p += 8;
615 top.num_objects = read_u32(p);
616 PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
617 if (top.num_objects > top.size / 24)
618 return -E_NO_WMA;
619 p += 4;
620 top.reserved1 = read_u8(p);
621 p++;
622 top.reserved2 = read_u8(p);
623 if (top.reserved2 != 2)
624 return -E_NO_WMA;
625 p++; /* objects start at p */
626 top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
627 ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
628 top.objects, &ton);
629 if (ret < 0)
630 goto out;
631 ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
632 top.objects + ton.content_descr_obj_num : NULL, &cdo);
633 if (ret < 0)
634 goto out;
635 ret = make_ecdo(tags, &ecdo);
636 if (ret < 0)
637 goto out;
638 ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
639 &ecdo);
640 out:
641 free(cdo.ptr);
642 free(ecdo.ptr);
643 free(top.objects);
644 return ret;
645 }
646
647 static const char * const wma_suffixes[] = {"wma", NULL};
648
649 /**
650 * The init function of the wma audio format handler.
651 *
652 * \param afh Pointer to the struct to initialize.
653 */
654 void wma_afh_init(struct audio_format_handler *afh)
655 {
656 afh->get_file_info = wma_get_file_info;
657 afh->suffixes = wma_suffixes;
658 afh->rewrite_tags = wma_rewrite_tags;
659 }