Fix documentation of check_filter_arg().
[paraslash.git] / wma_afh.c
1 /*
2 * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ba) for (_f = (_buf); \
22 _f + (_ba) + WMA_FRAME_SKIP < (_buf) + (_size); \
23 _f += (_ba) + WMA_FRAME_SKIP)
24
25 /*
26 * Must be called on a frame boundary, e.g. start + header_len.
27 * \return Frame count, superframe count via *num_superframes.
28 */
29 static int count_frames(const char *buf, int buf_size, int block_align,
30 int *num_superframes)
31 {
32 int fc = 0, sfc = 0; /* frame count, superframe count */
33 const uint8_t *p;
34
35
36 FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, block_align) {
37 fc += p[WMA_FRAME_SKIP] & 0x0f;
38 sfc++;
39 }
40 PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41 if (num_superframes)
42 *num_superframes = sfc;
43 return fc;
44 }
45
46 /*
47 * put_utf8() and get_str16() below are based on macros in libavutil/common.h
48 * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
49 * <michaelni@gmx.at>.
50 */
51
52 /*
53 * Convert a 32-bit Unicode character to its UTF-8 encoded form.
54 *
55 * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
56 * in the general case, depending on the length of the converted Unicode
57 * character.
58 *
59 * \param result Where the converted UTF-8 bytes are written.
60 */
61 static int put_utf8(uint32_t val, char *result)
62 {
63 char *out = result;
64 int bytes, shift;
65 uint32_t in = val;
66
67 if (in < 0x80) {
68 *out++ = in;
69 return 1;
70 }
71 bytes = (wma_log2(in) + 4) / 5;
72 shift = (bytes - 1) * 6;
73 *out++ = (256 - (256 >> bytes)) | (in >> shift);
74 while (shift >= 6) {
75 shift -= 6;
76 *out++ = 0x80 | ((in >> shift) & 0x3f);
77 }
78 return out - result;
79 }
80
81 static char *get_str16(const char *in, int len)
82 {
83 const char *p = in;
84 int out_size = 0, out_len = 0;
85 char *out = NULL;
86
87 len /= 2;
88 while (len--) {
89 uint32_t x;
90 if (out_len + 7 + 1 >= out_size) {
91 out_size = 2 * out_size + 50;
92 out = para_realloc(out, out_size);
93 }
94 x = read_u16(p);
95 p += 2;
96 out_len += put_utf8(x, out + out_len);
97 if (x == 0)
98 return out;
99 }
100 if (out)
101 out[out_len] = '\0';
102 return out;
103 }
104
105 static const char content_description_header[] = {
106 0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
107 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
108 };
109
110 static const char extended_content_header[] = {
111 0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
112 0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
113 };
114
115 static const char year_tag_header[] = { /* WM/Year */
116 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
117 0x65, 0x00, 0x61, 0x00, 0x72, 0x00
118 };
119
120 static const char album_tag_header[] = { /* WM/AlbumTitle */
121 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
122 0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
123 0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
124 0x65, 0x00
125 };
126
127 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
128 {
129 const char *p, *end = buf + buf_size, *q;
130 uint16_t len1, len2, len3, len4;
131
132 p = search_pattern(content_description_header,
133 sizeof(content_description_header), buf, buf_size);
134 if (!p || p + 34 >= end) {
135 PARA_NOTICE_LOG("content description header not found\n");
136 goto next;
137 }
138 p += 24;
139 len1 = read_u16(p);
140 p += 2;
141 len2 = read_u16(p);
142 p += 2;
143 len3 = read_u16(p);
144 p += 2;
145 len4 = read_u16(p);
146 p += 2;
147 /* ignore length of the rating information */
148 p += 2;
149 if (p + len1 >= end)
150 goto next;
151 ti->title = get_str16(p, len1);
152 p += len1;
153 if (p + len2 >= end)
154 goto next;
155 ti->artist = get_str16(p, len2);
156 p += len2 + len3;
157 if (p + len4 >= end)
158 goto next;
159 ti->comment = get_str16(p, len4);
160 next:
161 p = search_pattern(extended_content_header, sizeof(extended_content_header),
162 buf, buf_size);
163 if (!p) {
164 PARA_NOTICE_LOG("extended content header not found\n");
165 return;
166 }
167 q = search_pattern(year_tag_header, sizeof(year_tag_header),
168 p, end - p);
169 if (q) {
170 const char *r = q + sizeof(year_tag_header) + 6;
171 if (r < end)
172 ti->year = get_str16(r, end - r);
173 }
174 q = search_pattern(album_tag_header, sizeof(album_tag_header),
175 p, end - p);
176 if (q) {
177 const char *r = q + sizeof(album_tag_header) + 6;
178 if (r < end)
179 ti->album = get_str16(r, end - r);
180 }
181 }
182
183 static void set_chunk_tv(int frames_per_chunk, int frequency,
184 struct timeval *result)
185 {
186 uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
187 / frequency;
188
189 result->tv_sec = x / 1000 / 1000;
190 result->tv_usec = x % (1000 * 1000);
191 PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
192 }
193
194 /* Must be called on a frame boundary. */
195 static int wma_make_chunk_table(char *buf, size_t buf_size, int block_align,
196 struct afh_info *afhi)
197 {
198 const uint8_t *f, *start = (uint8_t *)buf;
199 int j, frames_per_chunk;
200 size_t ct_size = 250;
201 int ret, count = 0, num_frames, num_superframes;
202
203 afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
204 afhi->chunk_table[0] = 0;
205 afhi->chunk_table[1] = afhi->header_len;
206
207 num_frames = count_frames(buf, buf_size, block_align,
208 &num_superframes);
209 ret = -E_NO_WMA;
210 if (num_frames == 0 || num_superframes == 0)
211 goto fail;
212 afhi->seconds_total = num_frames * 2048 /* FIXME */
213 / afhi->frequency;
214 frames_per_chunk = num_frames / num_superframes / 2;
215 PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
216 j = 1;
217 FOR_EACH_FRAME(f, start, buf_size, block_align) {
218 count += f[WMA_FRAME_SKIP] & 0x0f;
219 while (count > j * frames_per_chunk) {
220 j++;
221 if (j >= ct_size) {
222 ct_size *= 2;
223 afhi->chunk_table = para_realloc(
224 afhi->chunk_table,
225 ct_size * sizeof(uint32_t));
226 }
227 afhi->chunk_table[j] = f - start + afhi->header_len + block_align + WMA_FRAME_SKIP;
228 }
229 }
230 afhi->chunks_total = j;
231 set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
232 return 1;
233 fail:
234 free(afhi->chunk_table);
235 return ret;
236 }
237
238 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
239 struct afh_info *afhi)
240 {
241 int ret;
242 struct asf_header_info ahi;
243
244 ret = read_asf_header(map, numbytes, &ahi);
245 if (ret < 0)
246 return ret;
247 if (ret == 0)
248 return -E_NO_WMA;
249 afhi->bitrate = ahi.bit_rate / 1000;
250 if (ahi.sample_rate == 0)
251 return -E_NO_WMA;
252 afhi->frequency = ahi.sample_rate;
253 afhi->channels = ahi.channels;
254 afhi->header_len = ahi.header_len;
255
256 afhi->techinfo = make_message("%s%s%s%s%s",
257 ahi.use_exp_vlc? "exp vlc" : "",
258 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
259 ahi.use_bit_reservoir? "bit reservoir" : "",
260 (ahi.use_variable_block_len &&
261 (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
262 ahi.use_variable_block_len? "vbl" : ""
263 );
264 wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
265 ahi.block_align, afhi);
266 read_asf_tags(map, ahi.header_len, &afhi->tags);
267 return 0;
268 }
269
270 struct asf_object {
271 char *ptr;
272 uint64_t size;
273 };
274
275 struct tag_object_nums {
276 int content_descr_obj_num;
277 int extended_content_descr_obj_num;
278 };
279
280 struct afs_top_level_header_object {
281 uint64_t size;
282 uint32_t num_objects;
283 uint8_t reserved1, reserved2;
284 struct asf_object *objects;
285 };
286
287 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
288
289 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
290 struct asf_object *objs, struct tag_object_nums *ton)
291 {
292 int i;
293 const char *p;
294
295 for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
296 if (p + 24 > src + size)
297 return -E_NO_WMA;
298 objs[i].ptr = (char *)p;
299 objs[i].size = read_u64(p + 16);
300 if (p + objs[i].size > src + size)
301 return -E_NO_WMA;
302
303 if (CHECK_HEADER(p, content_description_header))
304 ton->content_descr_obj_num = i;
305 else if (CHECK_HEADER(p, extended_content_header))
306 ton->extended_content_descr_obj_num = i;
307 }
308 return 1;
309 }
310
311 static const char top_level_header_object_guid[] = {
312 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
313 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
314 };
315
316 static int convert_utf8_to_utf16(char *src, char **dst)
317 {
318 /*
319 * Without specifying LE (little endian), iconv includes a byte order
320 * mark (e.g. 0xFFFE) at the beginning.
321 */
322 iconv_t cd = iconv_open("UTF-16LE", "UTF-8");
323 size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
324 char *inbuf, *outbuf;
325 int ret;
326
327 if (!src || !*src) {
328 *dst = para_calloc(2);
329 ret = 0;
330 goto out;
331 }
332 if (cd == (iconv_t) -1)
333 return -ERRNO_TO_PARA_ERROR(errno);
334 inbuf = src;
335 /* even though src is in UTF-8, strlen() should DTRT */
336 inbytes = inbytesleft = strlen(src);
337 outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
338 *dst = outbuf = para_malloc(outbytes);
339 sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
340 if (sz == (size_t)-1) {
341 ret = -ERRNO_TO_PARA_ERROR(errno);
342 goto out;
343 }
344 assert(outbytes >= outbytesleft);
345 assert(outbytes - outbytesleft < INT_MAX - 2);
346 ret = outbytes - outbytesleft;
347 outbuf = para_realloc(*dst, ret + 2);
348 outbuf[ret] = outbuf[ret + 1] = '\0';
349 ret += 2;
350 *dst = outbuf;
351 PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
352 out:
353 if (ret < 0)
354 free(*dst);
355 if (iconv_close(cd) < 0)
356 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
357 return ret;
358 }
359
360 /* The content description object contains artist, title, comment. */
361 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
362 struct asf_object *result)
363 {
364 const char *cr, *rating; /* orig data */
365 uint16_t orig_title_bytes, orig_artist_bytes, orig_cr_bytes,
366 orig_comment_bytes, orig_rating_bytes;
367 /* pointers to new UTF-16 tags */
368 char *artist = NULL, *title = NULL, *comment = NULL;
369 /* number of bytes in UTF-16 for the new tags */
370 int artist_bytes, title_bytes, comment_bytes, ret;
371 char *p, null[2] = "\0\0";
372
373 result->ptr = NULL;
374 result->size = 0;
375 ret = convert_utf8_to_utf16(tags->artist, &artist);
376 if (ret < 0)
377 return ret;
378 artist_bytes = ret;
379 ret = convert_utf8_to_utf16(tags->title, &title);
380 if (ret < 0)
381 goto out;
382 title_bytes = ret;
383 ret = convert_utf8_to_utf16(tags->comment, &comment);
384 if (ret < 0)
385 goto out;
386 comment_bytes = ret;
387
388 if (cdo) {
389 /*
390 * Sizes of the five fields (stored as 16-bit numbers) are
391 * located after the header (16 bytes) and the cdo size (8
392 * bytes).
393 */
394 orig_title_bytes = read_u16(cdo->ptr + 24);
395 orig_artist_bytes = read_u16(cdo->ptr + 26);
396 orig_cr_bytes = read_u16(cdo->ptr + 28);
397 orig_comment_bytes = read_u16(cdo->ptr + 30);
398 orig_rating_bytes = read_u16(cdo->ptr + 32);
399 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
400 rating = cr + orig_cr_bytes + orig_comment_bytes;
401 } else {
402 orig_title_bytes = 2;
403 orig_artist_bytes = 2;
404 orig_cr_bytes = 2;
405 orig_comment_bytes = 2;
406 orig_rating_bytes = 2;
407 cr = null;
408 rating = null;
409 }
410
411 /* compute size of result cdo */
412 result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
413 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
414 PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
415 p = result->ptr = para_malloc(result->size);
416 memcpy(p, content_description_header, 16);
417 p += 16;
418 write_u64(p, result->size);
419 p += 8;
420 write_u16(p, title_bytes);
421 p += 2;
422 write_u16(p, artist_bytes);
423 p += 2;
424 write_u16(p, orig_cr_bytes);
425 p += 2;
426 write_u16(p, comment_bytes);
427 p += 2;
428 write_u16(p, orig_rating_bytes);
429 p += 2;
430 memcpy(p, title, title_bytes);
431 p += title_bytes;
432 memcpy(p, artist, artist_bytes);
433 p += artist_bytes;
434 memcpy(p, cr, orig_cr_bytes);
435 p += orig_cr_bytes;
436 memcpy(p, comment, comment_bytes);
437 p += comment_bytes;
438 memcpy(p, rating, orig_rating_bytes);
439 p += orig_rating_bytes;
440 assert(p - result->ptr == result->size);
441 ret = 1;
442 out:
443 free(artist);
444 free(title);
445 free(comment);
446 return ret;
447 }
448
449 /* The extended content description object contains album and year. */
450 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
451 {
452 int ret;
453 char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
454 int album_bytes, year_bytes;
455
456 result->ptr = NULL;
457 result->size = 0;
458 ret = convert_utf8_to_utf16(tags->album, &album);
459 if (ret < 0)
460 return ret;
461 album_bytes = ret;
462 ret = convert_utf8_to_utf16(tags->year, &year);
463 if (ret < 0)
464 goto out;
465 year_bytes = ret;
466 result->size = 16 + 8 + 2; /* GUID, size, count */
467 /* name_length + name + null + data type + val length + val */
468 result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
469 result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
470
471 p = result->ptr = para_malloc(result->size);
472 memcpy(p, extended_content_header, 16);
473 p += 16;
474 write_u64(p, result->size);
475 p += 8;
476 write_u16(p, 2); /* count */
477 p += 2;
478
479 /* album */
480 write_u16(p, sizeof(album_tag_header) + 2);
481 p += 2;
482 memcpy(p, album_tag_header, sizeof(album_tag_header));
483 p += sizeof(album_tag_header);
484 memcpy(p, null, 2);
485 p += 2;
486 write_u16(p, 0); /* data type (UTF-16) */
487 p += 2;
488 write_u16(p, album_bytes);
489 p += 2;
490 memcpy(p, album, album_bytes);
491 p += album_bytes;
492
493 /* year */
494 write_u16(p, sizeof(year_tag_header));
495 p += 2;
496 memcpy(p, year_tag_header, sizeof(year_tag_header));
497 p += sizeof(year_tag_header);
498 memcpy(p, null, 2);
499 p += 2;
500 write_u16(p, 0); /* data type (UTF-16) */
501 p += 2;
502 write_u16(p, year_bytes);
503 p += 2;
504 memcpy(p, year, year_bytes);
505 p += year_bytes;
506 assert(p - result->ptr == result->size);
507 ret = 1;
508 out:
509 free(album);
510 free(year);
511 return ret;
512 }
513
514 static int write_output_file(int fd, const char *map, size_t mapsize,
515 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
516 struct asf_object *cdo, struct asf_object *ecdo)
517 {
518 int i, ret;
519 uint64_t sz; /* of the new header object */
520 uint32_t num_objects;
521 char tmp[8];
522
523 sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
524 for (i = 0; i < top->num_objects; i++) {
525 if (i == ton->content_descr_obj_num)
526 continue;
527 if (i == ton->extended_content_descr_obj_num)
528 continue;
529 sz += top->objects[i].size;
530 }
531 sz += cdo->size;
532 sz += ecdo->size;
533 num_objects = top->num_objects;
534 if (ton->content_descr_obj_num < 0)
535 num_objects++;
536 if (ton->extended_content_descr_obj_num < 0)
537 num_objects++;
538 ret = xwrite(fd, top_level_header_object_guid, 16);
539 if (ret < 0)
540 goto out;
541 write_u64(tmp, sz);
542 ret = xwrite(fd, tmp, 8);
543 if (ret < 0)
544 goto out;
545 write_u32(tmp, num_objects);
546 ret = xwrite(fd, tmp, 4);
547 if (ret < 0)
548 goto out;
549 write_u8(tmp, top->reserved1);
550 ret = xwrite(fd, tmp, 1);
551 if (ret < 0)
552 goto out;
553 write_u8(tmp, top->reserved2);
554 ret = xwrite(fd, tmp, 1);
555 if (ret < 0)
556 goto out;
557 /*
558 * Write cto and ecto as objects 0 and 1 if they did not exist in the
559 * original file.
560 */
561 if (ton->content_descr_obj_num < 0) {
562 ret = xwrite(fd, cdo->ptr, cdo->size);
563 if (ret < 0)
564 goto out;
565 }
566 if (ton->extended_content_descr_obj_num < 0) {
567 ret = xwrite(fd, ecdo->ptr, ecdo->size);
568 if (ret < 0)
569 goto out;
570 }
571
572 for (i = 0; i < top->num_objects; i++) {
573 char *buf = top->objects[i].ptr;
574 sz = top->objects[i].size;
575 if (i == ton->content_descr_obj_num) {
576 buf = cdo->ptr;
577 sz = cdo->size;
578 } else if (i == ton->extended_content_descr_obj_num) {
579 buf = ecdo->ptr;
580 sz = ecdo->size;
581 }
582 ret = xwrite(fd, buf, sz);
583 if (ret < 0)
584 goto out;
585 }
586 ret = xwrite(fd, map + top->size, mapsize - top->size);
587 out:
588 return ret;
589 }
590
591 static int wma_rewrite_tags(const char *map, size_t mapsize,
592 struct taginfo *tags, int fd,
593 __a_unused const char *filename)
594 {
595 struct afs_top_level_header_object top;
596 struct tag_object_nums ton = {-1, -1};
597 const char *p = map;
598 /* (extended) content description object */
599 struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
600 int ret;
601
602 /* guid + size + num_objects + 2 * reserved */
603 if (mapsize < 16 + 8 + 4 + 1 + 1)
604 return -E_NO_WMA;
605 if (memcmp(map, top_level_header_object_guid, 16))
606 return -E_NO_WMA;
607 p += 16;
608 top.size = read_u64(p);
609 PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
610 if (top.size >= mapsize)
611 return -E_NO_WMA;
612 p += 8;
613 top.num_objects = read_u32(p);
614 PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
615 if (top.num_objects > top.size / 24)
616 return -E_NO_WMA;
617 p += 4;
618 top.reserved1 = read_u8(p);
619 p++;
620 top.reserved2 = read_u8(p);
621 if (top.reserved2 != 2)
622 return -E_NO_WMA;
623 p++; /* objects start at p */
624 top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
625 ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
626 top.objects, &ton);
627 if (ret < 0)
628 goto out;
629 ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
630 top.objects + ton.content_descr_obj_num : NULL, &cdo);
631 if (ret < 0)
632 goto out;
633 ret = make_ecdo(tags, &ecdo);
634 if (ret < 0)
635 goto out;
636 ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
637 &ecdo);
638 out:
639 free(cdo.ptr);
640 free(ecdo.ptr);
641 free(top.objects);
642 return ret;
643 }
644
645 static const char * const wma_suffixes[] = {"wma", NULL};
646
647 /**
648 * The init function of the wma audio format handler.
649 *
650 * \param afh Pointer to the struct to initialize.
651 */
652 void wma_afh_init(struct audio_format_handler *afh)
653 {
654 afh->get_file_info = wma_get_file_info;
655 afh->suffixes = wma_suffixes;
656 afh->rewrite_tags = wma_rewrite_tags;
657 }