Merge branch 'refs/heads/t/opus'
[paraslash.git] / wma_afh.c
1 /*
2 * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ps) for (_f = (_buf); \
22 _f + (_ps) < (_buf) + (_size); \
23 _f += (_ps))
24
25 /*
26 * Must be called on a frame boundary, e.g. start + header_len.
27 * \return Frame count, superframe count via *num_superframes.
28 */
29 static int count_frames(const char *buf, int buf_size, uint32_t packet_size,
30 int *num_superframes)
31 {
32 int fc = 0, sfc = 0; /* frame count, superframe count */
33 const uint8_t *p;
34
35
36 FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, packet_size) {
37 fc += p[WMA_FRAME_SKIP] & 0x0f;
38 sfc++;
39 }
40 PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41 *num_superframes = sfc;
42 return fc;
43 }
44
45 /*
46 * put_utf8() and get_str16() below are based on macros in libavutil/common.h
47 * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
48 * <michaelni@gmx.at>.
49 */
50
51 /*
52 * Convert a 32-bit Unicode character to its UTF-8 encoded form.
53 *
54 * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
55 * in the general case, depending on the length of the converted Unicode
56 * character.
57 *
58 * \param result Where the converted UTF-8 bytes are written.
59 */
60 static int put_utf8(uint32_t val, char *result)
61 {
62 char *out = result;
63 int bytes, shift;
64 uint32_t in = val;
65
66 if (in < 0x80) {
67 *out++ = in;
68 return 1;
69 }
70 bytes = DIV_ROUND_UP(wma_log2(in), 5);
71 shift = (bytes - 1) * 6;
72 *out++ = (256 - (256 >> bytes)) | (in >> shift);
73 while (shift >= 6) {
74 shift -= 6;
75 *out++ = 0x80 | ((in >> shift) & 0x3f);
76 }
77 return out - result;
78 }
79
80 static char *get_str16(const char *in, int len)
81 {
82 const char *p = in;
83 int out_size = 0, out_len = 0;
84 char *out = NULL;
85
86 len /= 2;
87 while (len--) {
88 uint32_t x;
89 if (out_len + 7 + 1 >= out_size) {
90 out_size = 2 * out_size + 50;
91 out = para_realloc(out, out_size);
92 }
93 x = read_u16(p);
94 p += 2;
95 out_len += put_utf8(x, out + out_len);
96 if (x == 0)
97 return out;
98 }
99 if (out)
100 out[out_len] = '\0';
101 return out;
102 }
103
104 static const char content_description_header[] = {
105 0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
106 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
107 };
108
109 static const char extended_content_header[] = {
110 0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
111 0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
112 };
113
114 static const char year_tag_header[] = { /* WM/Year */
115 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
116 0x65, 0x00, 0x61, 0x00, 0x72, 0x00
117 };
118
119 static const char album_tag_header[] = { /* WM/AlbumTitle */
120 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
121 0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
122 0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
123 0x65, 0x00
124 };
125
126 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
127 {
128 const char *p, *end = buf + buf_size, *q;
129 uint16_t len1, len2, len3, len4;
130
131 p = search_pattern(content_description_header,
132 sizeof(content_description_header), buf, buf_size);
133 if (!p || p + 34 >= end) {
134 PARA_NOTICE_LOG("content description header not found\n");
135 goto next;
136 }
137 p += 24;
138 len1 = read_u16(p);
139 p += 2;
140 len2 = read_u16(p);
141 p += 2;
142 len3 = read_u16(p);
143 p += 2;
144 len4 = read_u16(p);
145 p += 2;
146 /* ignore length of the rating information */
147 p += 2;
148 if (p + len1 >= end)
149 goto next;
150 ti->title = get_str16(p, len1);
151 p += len1;
152 if (p + len2 >= end)
153 goto next;
154 ti->artist = get_str16(p, len2);
155 p += len2 + len3;
156 if (p + len4 >= end)
157 goto next;
158 ti->comment = get_str16(p, len4);
159 next:
160 p = search_pattern(extended_content_header, sizeof(extended_content_header),
161 buf, buf_size);
162 if (!p) {
163 PARA_NOTICE_LOG("extended content header not found\n");
164 return;
165 }
166 q = search_pattern(year_tag_header, sizeof(year_tag_header),
167 p, end - p);
168 if (q) {
169 const char *r = q + sizeof(year_tag_header) + 6;
170 if (r < end)
171 ti->year = get_str16(r, end - r);
172 }
173 q = search_pattern(album_tag_header, sizeof(album_tag_header),
174 p, end - p);
175 if (q) {
176 const char *r = q + sizeof(album_tag_header) + 6;
177 if (r < end)
178 ti->album = get_str16(r, end - r);
179 }
180 }
181
182 static void set_chunk_tv(int frames_per_chunk, int frequency,
183 struct timeval *result)
184 {
185 uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
186 / frequency;
187
188 result->tv_sec = x / 1000 / 1000;
189 result->tv_usec = x % (1000 * 1000);
190 PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
191 }
192
193 /* Must be called on a frame boundary. */
194 static int wma_make_chunk_table(char *buf, size_t buf_size, uint32_t packet_size,
195 struct afh_info *afhi)
196 {
197 const uint8_t *f, *start = (uint8_t *)buf;
198 int j, frames_per_chunk;
199 size_t ct_size = 250;
200 int ret, count = 0, num_frames, num_superframes;
201
202 afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
203 afhi->chunk_table[0] = 0;
204 afhi->chunk_table[1] = afhi->header_len;
205
206 num_frames = count_frames(buf, buf_size, packet_size,
207 &num_superframes);
208 ret = -E_NO_WMA;
209 if (num_frames == 0 || num_superframes == 0)
210 goto fail;
211 afhi->seconds_total = num_frames * 2048 /* FIXME */
212 / afhi->frequency;
213 frames_per_chunk = num_frames / num_superframes / 2;
214 PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
215 j = 1;
216 FOR_EACH_FRAME(f, start, buf_size, packet_size) {
217 count += f[WMA_FRAME_SKIP] & 0x0f;
218 while (count > j * frames_per_chunk) {
219 j++;
220 if (j >= ct_size) {
221 ct_size *= 2;
222 afhi->chunk_table = para_realloc(
223 afhi->chunk_table,
224 ct_size * sizeof(uint32_t));
225 }
226 afhi->chunk_table[j] = f - start + afhi->header_len
227 + packet_size;
228 }
229 }
230 afhi->chunks_total = j;
231 set_max_chunk_size(afhi);
232 set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
233 return 1;
234 fail:
235 free(afhi->chunk_table);
236 return ret;
237 }
238
239 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
240 struct afh_info *afhi)
241 {
242 int ret;
243 struct asf_header_info ahi;
244
245 ret = read_asf_header(map, numbytes, &ahi);
246 if (ret < 0)
247 return ret;
248 if (ret == 0)
249 return -E_NO_WMA;
250 afhi->bitrate = ahi.bit_rate / 1000;
251 if (ahi.sample_rate == 0)
252 return -E_NO_WMA;
253 afhi->frequency = ahi.sample_rate;
254 afhi->channels = ahi.channels;
255 afhi->header_len = ahi.header_len;
256
257 afhi->techinfo = make_message("%s%s%s%s%s",
258 ahi.use_exp_vlc? "exp vlc" : "",
259 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
260 ahi.use_bit_reservoir? "bit reservoir" : "",
261 (ahi.use_variable_block_len &&
262 (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
263 ahi.use_variable_block_len? "vbl" : ""
264 );
265 wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
266 ahi.packet_size, afhi);
267 read_asf_tags(map, ahi.header_len, &afhi->tags);
268 return 0;
269 }
270
271 struct asf_object {
272 char *ptr;
273 uint64_t size;
274 };
275
276 struct tag_object_nums {
277 int content_descr_obj_num;
278 int extended_content_descr_obj_num;
279 };
280
281 struct afs_top_level_header_object {
282 uint64_t size;
283 uint32_t num_objects;
284 uint8_t reserved1, reserved2;
285 struct asf_object *objects;
286 };
287
288 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
289
290 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
291 struct asf_object *objs, struct tag_object_nums *ton)
292 {
293 int i;
294 const char *p;
295
296 for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
297 if (p + 24 > src + size)
298 return -E_NO_WMA;
299 objs[i].ptr = (char *)p;
300 objs[i].size = read_u64(p + 16);
301 if (p + objs[i].size > src + size)
302 return -E_NO_WMA;
303
304 if (CHECK_HEADER(p, content_description_header))
305 ton->content_descr_obj_num = i;
306 else if (CHECK_HEADER(p, extended_content_header))
307 ton->extended_content_descr_obj_num = i;
308 }
309 return 1;
310 }
311
312 static const char top_level_header_object_guid[] = {
313 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
314 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
315 };
316
317 static int convert_utf8_to_utf16(char *src, char **dst)
318 {
319 iconv_t cd;
320 size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
321 char *inbuf, *outbuf;
322 int ret;
323
324 if (!src || !*src) {
325 *dst = para_calloc(2);
326 return 0;
327 }
328 /*
329 * Without specifying LE (little endian), iconv includes a byte order
330 * mark (e.g. 0xFFFE) at the beginning.
331 */
332 cd = iconv_open("UTF-16LE", "UTF-8");
333 if (cd == (iconv_t)-1) {
334 *dst = NULL;
335 return -ERRNO_TO_PARA_ERROR(errno);
336 }
337 inbuf = src;
338 /* even though src is in UTF-8, strlen() should DTRT */
339 inbytes = inbytesleft = strlen(src);
340 outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
341 *dst = outbuf = para_malloc(outbytes);
342 sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
343 if (sz == (size_t)-1) {
344 ret = -ERRNO_TO_PARA_ERROR(errno);
345 free(*dst);
346 *dst = NULL;
347 goto out;
348 }
349 assert(outbytes >= outbytesleft);
350 assert(outbytes - outbytesleft < INT_MAX - 2);
351 ret = outbytes - outbytesleft;
352 outbuf = para_realloc(*dst, ret + 2);
353 outbuf[ret] = outbuf[ret + 1] = '\0';
354 ret += 2;
355 *dst = outbuf;
356 PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
357 out:
358 if (iconv_close(cd) < 0)
359 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
360 return ret;
361 }
362
363 /* The content description object contains artist, title, comment. */
364 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
365 struct asf_object *result)
366 {
367 const char *cr, *rating; /* orig data */
368 uint16_t orig_cr_bytes, orig_rating_bytes;
369 /* pointers to new UTF-16 tags */
370 char *artist = NULL, *title = NULL, *comment = NULL;
371 /* number of bytes in UTF-16 for the new tags */
372 int artist_bytes, title_bytes, comment_bytes, ret;
373 char *p, null[2] = "\0\0";
374
375 result->ptr = NULL;
376 result->size = 0;
377 ret = convert_utf8_to_utf16(tags->artist, &artist);
378 if (ret < 0)
379 return ret;
380 assert(artist);
381 artist_bytes = ret;
382 ret = convert_utf8_to_utf16(tags->title, &title);
383 if (ret < 0)
384 goto out;
385 assert(title);
386 title_bytes = ret;
387 ret = convert_utf8_to_utf16(tags->comment, &comment);
388 if (ret < 0)
389 goto out;
390 assert(comment);
391 comment_bytes = ret;
392
393 if (cdo) {
394 uint16_t orig_title_bytes, orig_artist_bytes, orig_comment_bytes;
395 /*
396 * Sizes of the five fields (stored as 16-bit numbers) are
397 * located after the header (16 bytes) and the cdo size (8
398 * bytes).
399 */
400 orig_title_bytes = read_u16(cdo->ptr + 24);
401 orig_artist_bytes = read_u16(cdo->ptr + 26);
402 orig_cr_bytes = read_u16(cdo->ptr + 28);
403 orig_comment_bytes = read_u16(cdo->ptr + 30);
404 orig_rating_bytes = read_u16(cdo->ptr + 32);
405 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
406 rating = cr + orig_cr_bytes + orig_comment_bytes;
407 } else {
408 orig_cr_bytes = 2;
409 orig_rating_bytes = 2;
410 cr = null;
411 rating = null;
412 }
413
414 /* compute size of result cdo */
415 result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
416 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
417 PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
418 p = result->ptr = para_malloc(result->size);
419 memcpy(p, content_description_header, 16);
420 p += 16;
421 write_u64(p, result->size);
422 p += 8;
423 write_u16(p, title_bytes);
424 p += 2;
425 write_u16(p, artist_bytes);
426 p += 2;
427 write_u16(p, orig_cr_bytes);
428 p += 2;
429 write_u16(p, comment_bytes);
430 p += 2;
431 write_u16(p, orig_rating_bytes);
432 p += 2;
433 memcpy(p, title, title_bytes);
434 p += title_bytes;
435 memcpy(p, artist, artist_bytes);
436 p += artist_bytes;
437 memcpy(p, cr, orig_cr_bytes);
438 p += orig_cr_bytes;
439 memcpy(p, comment, comment_bytes);
440 p += comment_bytes;
441 memcpy(p, rating, orig_rating_bytes);
442 p += orig_rating_bytes;
443 assert(p - result->ptr == result->size);
444 ret = 1;
445 out:
446 free(artist);
447 free(title);
448 free(comment);
449 return ret;
450 }
451
452 /* The extended content description object contains album and year. */
453 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
454 {
455 int ret;
456 char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
457 int album_bytes, year_bytes;
458
459 result->ptr = NULL;
460 result->size = 0;
461 ret = convert_utf8_to_utf16(tags->album, &album);
462 if (ret < 0)
463 return ret;
464 assert(album);
465 album_bytes = ret;
466 ret = convert_utf8_to_utf16(tags->year, &year);
467 if (ret < 0)
468 goto out;
469 assert(year);
470 year_bytes = ret;
471 result->size = 16 + 8 + 2; /* GUID, size, count */
472 /* name_length + name + null + data type + val length + val */
473 result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
474 result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
475
476 p = result->ptr = para_malloc(result->size);
477 memcpy(p, extended_content_header, 16);
478 p += 16;
479 write_u64(p, result->size);
480 p += 8;
481 write_u16(p, 2); /* count */
482 p += 2;
483
484 /* album */
485 write_u16(p, sizeof(album_tag_header) + 2);
486 p += 2;
487 memcpy(p, album_tag_header, sizeof(album_tag_header));
488 p += sizeof(album_tag_header);
489 memcpy(p, null, 2);
490 p += 2;
491 write_u16(p, 0); /* data type (UTF-16) */
492 p += 2;
493 write_u16(p, album_bytes);
494 p += 2;
495 memcpy(p, album, album_bytes);
496 p += album_bytes;
497
498 /* year */
499 write_u16(p, sizeof(year_tag_header));
500 p += 2;
501 memcpy(p, year_tag_header, sizeof(year_tag_header));
502 p += sizeof(year_tag_header);
503 memcpy(p, null, 2);
504 p += 2;
505 write_u16(p, 0); /* data type (UTF-16) */
506 p += 2;
507 write_u16(p, year_bytes);
508 p += 2;
509 memcpy(p, year, year_bytes);
510 p += year_bytes;
511 assert(p - result->ptr == result->size);
512 ret = 1;
513 out:
514 free(album);
515 free(year);
516 return ret;
517 }
518
519 static int write_output_file(int fd, const char *map, size_t mapsize,
520 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
521 struct asf_object *cdo, struct asf_object *ecdo)
522 {
523 int i, ret;
524 uint64_t sz; /* of the new header object */
525 uint32_t num_objects;
526 char tmp[8];
527
528 sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
529 for (i = 0; i < top->num_objects; i++) {
530 if (i == ton->content_descr_obj_num)
531 continue;
532 if (i == ton->extended_content_descr_obj_num)
533 continue;
534 sz += top->objects[i].size;
535 }
536 sz += cdo->size;
537 sz += ecdo->size;
538 num_objects = top->num_objects;
539 if (ton->content_descr_obj_num < 0)
540 num_objects++;
541 if (ton->extended_content_descr_obj_num < 0)
542 num_objects++;
543 ret = xwrite(fd, top_level_header_object_guid, 16);
544 if (ret < 0)
545 goto out;
546 write_u64(tmp, sz);
547 ret = xwrite(fd, tmp, 8);
548 if (ret < 0)
549 goto out;
550 write_u32(tmp, num_objects);
551 ret = xwrite(fd, tmp, 4);
552 if (ret < 0)
553 goto out;
554 write_u8(tmp, top->reserved1);
555 ret = xwrite(fd, tmp, 1);
556 if (ret < 0)
557 goto out;
558 write_u8(tmp, top->reserved2);
559 ret = xwrite(fd, tmp, 1);
560 if (ret < 0)
561 goto out;
562 /*
563 * Write cto and ecto as objects 0 and 1 if they did not exist in the
564 * original file.
565 */
566 if (ton->content_descr_obj_num < 0) {
567 ret = xwrite(fd, cdo->ptr, cdo->size);
568 if (ret < 0)
569 goto out;
570 }
571 if (ton->extended_content_descr_obj_num < 0) {
572 ret = xwrite(fd, ecdo->ptr, ecdo->size);
573 if (ret < 0)
574 goto out;
575 }
576
577 for (i = 0; i < top->num_objects; i++) {
578 char *buf = top->objects[i].ptr;
579 sz = top->objects[i].size;
580 if (i == ton->content_descr_obj_num) {
581 buf = cdo->ptr;
582 sz = cdo->size;
583 } else if (i == ton->extended_content_descr_obj_num) {
584 buf = ecdo->ptr;
585 sz = ecdo->size;
586 }
587 ret = xwrite(fd, buf, sz);
588 if (ret < 0)
589 goto out;
590 }
591 ret = xwrite(fd, map + top->size, mapsize - top->size);
592 out:
593 return ret;
594 }
595
596 static int wma_rewrite_tags(const char *map, size_t mapsize,
597 struct taginfo *tags, int fd,
598 __a_unused const char *filename)
599 {
600 struct afs_top_level_header_object top;
601 struct tag_object_nums ton = {-1, -1};
602 const char *p = map;
603 /* (extended) content description object */
604 struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
605 int ret;
606
607 /* guid + size + num_objects + 2 * reserved */
608 if (mapsize < 16 + 8 + 4 + 1 + 1)
609 return -E_NO_WMA;
610 if (memcmp(map, top_level_header_object_guid, 16))
611 return -E_NO_WMA;
612 p += 16;
613 top.size = read_u64(p);
614 PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
615 if (top.size >= mapsize)
616 return -E_NO_WMA;
617 p += 8;
618 top.num_objects = read_u32(p);
619 PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
620 if (top.num_objects > top.size / 24)
621 return -E_NO_WMA;
622 p += 4;
623 top.reserved1 = read_u8(p);
624 p++;
625 top.reserved2 = read_u8(p);
626 if (top.reserved2 != 2)
627 return -E_NO_WMA;
628 p++; /* objects start at p */
629 top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
630 ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
631 top.objects, &ton);
632 if (ret < 0)
633 goto out;
634 ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
635 top.objects + ton.content_descr_obj_num : NULL, &cdo);
636 if (ret < 0)
637 goto out;
638 ret = make_ecdo(tags, &ecdo);
639 if (ret < 0)
640 goto out;
641 ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
642 &ecdo);
643 out:
644 free(cdo.ptr);
645 free(ecdo.ptr);
646 free(top.objects);
647 return ret;
648 }
649
650 static const char * const wma_suffixes[] = {"wma", NULL};
651
652 /**
653 * The init function of the wma audio format handler.
654 *
655 * \param afh Pointer to the struct to initialize.
656 */
657 void wma_afh_init(struct audio_format_handler *afh)
658 {
659 afh->get_file_info = wma_get_file_info;
660 afh->suffixes = wma_suffixes;
661 afh->rewrite_tags = wma_rewrite_tags;
662 }