NEWS,md: Add introductory text for v0.5.7.
[paraslash.git] / wma_afh.c
1 /*
2 * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3 *
4 * Licensed under the GPL v2. For licencing details see COPYING.
5 */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ps) for (_f = (_buf); \
22 _f + (_ps) < (_buf) + (_size); \
23 _f += (_ps))
24
25 /*
26 * Must be called on a frame boundary, e.g. start + header_len.
27 * \return Frame count, superframe count via *num_superframes.
28 */
29 static int count_frames(const char *buf, int buf_size, uint32_t packet_size,
30 int *num_superframes)
31 {
32 int fc = 0, sfc = 0; /* frame count, superframe count */
33 const uint8_t *p;
34
35
36 FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, packet_size) {
37 fc += p[WMA_FRAME_SKIP] & 0x0f;
38 sfc++;
39 }
40 PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41 if (num_superframes)
42 *num_superframes = sfc;
43 return fc;
44 }
45
46 /*
47 * put_utf8() and get_str16() below are based on macros in libavutil/common.h
48 * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
49 * <michaelni@gmx.at>.
50 */
51
52 /*
53 * Convert a 32-bit Unicode character to its UTF-8 encoded form.
54 *
55 * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
56 * in the general case, depending on the length of the converted Unicode
57 * character.
58 *
59 * \param result Where the converted UTF-8 bytes are written.
60 */
61 static int put_utf8(uint32_t val, char *result)
62 {
63 char *out = result;
64 int bytes, shift;
65 uint32_t in = val;
66
67 if (in < 0x80) {
68 *out++ = in;
69 return 1;
70 }
71 bytes = (wma_log2(in) + 4) / 5;
72 shift = (bytes - 1) * 6;
73 *out++ = (256 - (256 >> bytes)) | (in >> shift);
74 while (shift >= 6) {
75 shift -= 6;
76 *out++ = 0x80 | ((in >> shift) & 0x3f);
77 }
78 return out - result;
79 }
80
81 static char *get_str16(const char *in, int len)
82 {
83 const char *p = in;
84 int out_size = 0, out_len = 0;
85 char *out = NULL;
86
87 len /= 2;
88 while (len--) {
89 uint32_t x;
90 if (out_len + 7 + 1 >= out_size) {
91 out_size = 2 * out_size + 50;
92 out = para_realloc(out, out_size);
93 }
94 x = read_u16(p);
95 p += 2;
96 out_len += put_utf8(x, out + out_len);
97 if (x == 0)
98 return out;
99 }
100 if (out)
101 out[out_len] = '\0';
102 return out;
103 }
104
105 static const char content_description_header[] = {
106 0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
107 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
108 };
109
110 static const char extended_content_header[] = {
111 0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
112 0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
113 };
114
115 static const char year_tag_header[] = { /* WM/Year */
116 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
117 0x65, 0x00, 0x61, 0x00, 0x72, 0x00
118 };
119
120 static const char album_tag_header[] = { /* WM/AlbumTitle */
121 0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
122 0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
123 0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
124 0x65, 0x00
125 };
126
127 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
128 {
129 const char *p, *end = buf + buf_size, *q;
130 uint16_t len1, len2, len3, len4;
131
132 p = search_pattern(content_description_header,
133 sizeof(content_description_header), buf, buf_size);
134 if (!p || p + 34 >= end) {
135 PARA_NOTICE_LOG("content description header not found\n");
136 goto next;
137 }
138 p += 24;
139 len1 = read_u16(p);
140 p += 2;
141 len2 = read_u16(p);
142 p += 2;
143 len3 = read_u16(p);
144 p += 2;
145 len4 = read_u16(p);
146 p += 2;
147 /* ignore length of the rating information */
148 p += 2;
149 if (p + len1 >= end)
150 goto next;
151 ti->title = get_str16(p, len1);
152 p += len1;
153 if (p + len2 >= end)
154 goto next;
155 ti->artist = get_str16(p, len2);
156 p += len2 + len3;
157 if (p + len4 >= end)
158 goto next;
159 ti->comment = get_str16(p, len4);
160 next:
161 p = search_pattern(extended_content_header, sizeof(extended_content_header),
162 buf, buf_size);
163 if (!p) {
164 PARA_NOTICE_LOG("extended content header not found\n");
165 return;
166 }
167 q = search_pattern(year_tag_header, sizeof(year_tag_header),
168 p, end - p);
169 if (q) {
170 const char *r = q + sizeof(year_tag_header) + 6;
171 if (r < end)
172 ti->year = get_str16(r, end - r);
173 }
174 q = search_pattern(album_tag_header, sizeof(album_tag_header),
175 p, end - p);
176 if (q) {
177 const char *r = q + sizeof(album_tag_header) + 6;
178 if (r < end)
179 ti->album = get_str16(r, end - r);
180 }
181 }
182
183 static void set_chunk_tv(int frames_per_chunk, int frequency,
184 struct timeval *result)
185 {
186 uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
187 / frequency;
188
189 result->tv_sec = x / 1000 / 1000;
190 result->tv_usec = x % (1000 * 1000);
191 PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
192 }
193
194 /* Must be called on a frame boundary. */
195 static int wma_make_chunk_table(char *buf, size_t buf_size, uint32_t packet_size,
196 struct afh_info *afhi)
197 {
198 const uint8_t *f, *start = (uint8_t *)buf;
199 int j, frames_per_chunk;
200 size_t ct_size = 250;
201 int ret, count = 0, num_frames, num_superframes;
202
203 afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
204 afhi->chunk_table[0] = 0;
205 afhi->chunk_table[1] = afhi->header_len;
206
207 num_frames = count_frames(buf, buf_size, packet_size,
208 &num_superframes);
209 ret = -E_NO_WMA;
210 if (num_frames == 0 || num_superframes == 0)
211 goto fail;
212 afhi->seconds_total = num_frames * 2048 /* FIXME */
213 / afhi->frequency;
214 frames_per_chunk = num_frames / num_superframes / 2;
215 PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
216 j = 1;
217 FOR_EACH_FRAME(f, start, buf_size, packet_size) {
218 count += f[WMA_FRAME_SKIP] & 0x0f;
219 while (count > j * frames_per_chunk) {
220 j++;
221 if (j >= ct_size) {
222 ct_size *= 2;
223 afhi->chunk_table = para_realloc(
224 afhi->chunk_table,
225 ct_size * sizeof(uint32_t));
226 }
227 afhi->chunk_table[j] = f - start + afhi->header_len
228 + packet_size;
229 }
230 }
231 afhi->chunks_total = j;
232 set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
233 return 1;
234 fail:
235 free(afhi->chunk_table);
236 return ret;
237 }
238
239 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
240 struct afh_info *afhi)
241 {
242 int ret;
243 struct asf_header_info ahi;
244
245 ret = read_asf_header(map, numbytes, &ahi);
246 if (ret < 0)
247 return ret;
248 if (ret == 0)
249 return -E_NO_WMA;
250 afhi->bitrate = ahi.bit_rate / 1000;
251 if (ahi.sample_rate == 0)
252 return -E_NO_WMA;
253 afhi->frequency = ahi.sample_rate;
254 afhi->channels = ahi.channels;
255 afhi->header_len = ahi.header_len;
256
257 afhi->techinfo = make_message("%s%s%s%s%s",
258 ahi.use_exp_vlc? "exp vlc" : "",
259 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
260 ahi.use_bit_reservoir? "bit reservoir" : "",
261 (ahi.use_variable_block_len &&
262 (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
263 ahi.use_variable_block_len? "vbl" : ""
264 );
265 wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
266 ahi.packet_size, afhi);
267 read_asf_tags(map, ahi.header_len, &afhi->tags);
268 return 0;
269 }
270
271 struct asf_object {
272 char *ptr;
273 uint64_t size;
274 };
275
276 struct tag_object_nums {
277 int content_descr_obj_num;
278 int extended_content_descr_obj_num;
279 };
280
281 struct afs_top_level_header_object {
282 uint64_t size;
283 uint32_t num_objects;
284 uint8_t reserved1, reserved2;
285 struct asf_object *objects;
286 };
287
288 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
289
290 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
291 struct asf_object *objs, struct tag_object_nums *ton)
292 {
293 int i;
294 const char *p;
295
296 for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
297 if (p + 24 > src + size)
298 return -E_NO_WMA;
299 objs[i].ptr = (char *)p;
300 objs[i].size = read_u64(p + 16);
301 if (p + objs[i].size > src + size)
302 return -E_NO_WMA;
303
304 if (CHECK_HEADER(p, content_description_header))
305 ton->content_descr_obj_num = i;
306 else if (CHECK_HEADER(p, extended_content_header))
307 ton->extended_content_descr_obj_num = i;
308 }
309 return 1;
310 }
311
312 static const char top_level_header_object_guid[] = {
313 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
314 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
315 };
316
317 static int convert_utf8_to_utf16(char *src, char **dst)
318 {
319 /*
320 * Without specifying LE (little endian), iconv includes a byte order
321 * mark (e.g. 0xFFFE) at the beginning.
322 */
323 iconv_t cd = iconv_open("UTF-16LE", "UTF-8");
324 size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
325 char *inbuf, *outbuf;
326 int ret;
327
328 if (!src || !*src) {
329 *dst = para_calloc(2);
330 ret = 0;
331 goto out;
332 }
333 if (cd == (iconv_t) -1)
334 return -ERRNO_TO_PARA_ERROR(errno);
335 inbuf = src;
336 /* even though src is in UTF-8, strlen() should DTRT */
337 inbytes = inbytesleft = strlen(src);
338 outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
339 *dst = outbuf = para_malloc(outbytes);
340 sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
341 if (sz == (size_t)-1) {
342 ret = -ERRNO_TO_PARA_ERROR(errno);
343 goto out;
344 }
345 assert(outbytes >= outbytesleft);
346 assert(outbytes - outbytesleft < INT_MAX - 2);
347 ret = outbytes - outbytesleft;
348 outbuf = para_realloc(*dst, ret + 2);
349 outbuf[ret] = outbuf[ret + 1] = '\0';
350 ret += 2;
351 *dst = outbuf;
352 PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
353 out:
354 if (ret < 0)
355 free(*dst);
356 if (iconv_close(cd) < 0)
357 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
358 return ret;
359 }
360
361 /* The content description object contains artist, title, comment. */
362 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
363 struct asf_object *result)
364 {
365 const char *cr, *rating; /* orig data */
366 uint16_t orig_title_bytes, orig_artist_bytes, orig_cr_bytes,
367 orig_comment_bytes, orig_rating_bytes;
368 /* pointers to new UTF-16 tags */
369 char *artist = NULL, *title = NULL, *comment = NULL;
370 /* number of bytes in UTF-16 for the new tags */
371 int artist_bytes, title_bytes, comment_bytes, ret;
372 char *p, null[2] = "\0\0";
373
374 result->ptr = NULL;
375 result->size = 0;
376 ret = convert_utf8_to_utf16(tags->artist, &artist);
377 if (ret < 0)
378 return ret;
379 artist_bytes = ret;
380 ret = convert_utf8_to_utf16(tags->title, &title);
381 if (ret < 0)
382 goto out;
383 title_bytes = ret;
384 ret = convert_utf8_to_utf16(tags->comment, &comment);
385 if (ret < 0)
386 goto out;
387 comment_bytes = ret;
388
389 if (cdo) {
390 /*
391 * Sizes of the five fields (stored as 16-bit numbers) are
392 * located after the header (16 bytes) and the cdo size (8
393 * bytes).
394 */
395 orig_title_bytes = read_u16(cdo->ptr + 24);
396 orig_artist_bytes = read_u16(cdo->ptr + 26);
397 orig_cr_bytes = read_u16(cdo->ptr + 28);
398 orig_comment_bytes = read_u16(cdo->ptr + 30);
399 orig_rating_bytes = read_u16(cdo->ptr + 32);
400 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
401 rating = cr + orig_cr_bytes + orig_comment_bytes;
402 } else {
403 orig_title_bytes = 2;
404 orig_artist_bytes = 2;
405 orig_cr_bytes = 2;
406 orig_comment_bytes = 2;
407 orig_rating_bytes = 2;
408 cr = null;
409 rating = null;
410 }
411
412 /* compute size of result cdo */
413 result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
414 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
415 PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
416 p = result->ptr = para_malloc(result->size);
417 memcpy(p, content_description_header, 16);
418 p += 16;
419 write_u64(p, result->size);
420 p += 8;
421 write_u16(p, title_bytes);
422 p += 2;
423 write_u16(p, artist_bytes);
424 p += 2;
425 write_u16(p, orig_cr_bytes);
426 p += 2;
427 write_u16(p, comment_bytes);
428 p += 2;
429 write_u16(p, orig_rating_bytes);
430 p += 2;
431 memcpy(p, title, title_bytes);
432 p += title_bytes;
433 memcpy(p, artist, artist_bytes);
434 p += artist_bytes;
435 memcpy(p, cr, orig_cr_bytes);
436 p += orig_cr_bytes;
437 memcpy(p, comment, comment_bytes);
438 p += comment_bytes;
439 memcpy(p, rating, orig_rating_bytes);
440 p += orig_rating_bytes;
441 assert(p - result->ptr == result->size);
442 ret = 1;
443 out:
444 free(artist);
445 free(title);
446 free(comment);
447 return ret;
448 }
449
450 /* The extended content description object contains album and year. */
451 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
452 {
453 int ret;
454 char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
455 int album_bytes, year_bytes;
456
457 result->ptr = NULL;
458 result->size = 0;
459 ret = convert_utf8_to_utf16(tags->album, &album);
460 if (ret < 0)
461 return ret;
462 album_bytes = ret;
463 ret = convert_utf8_to_utf16(tags->year, &year);
464 if (ret < 0)
465 goto out;
466 year_bytes = ret;
467 result->size = 16 + 8 + 2; /* GUID, size, count */
468 /* name_length + name + null + data type + val length + val */
469 result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
470 result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
471
472 p = result->ptr = para_malloc(result->size);
473 memcpy(p, extended_content_header, 16);
474 p += 16;
475 write_u64(p, result->size);
476 p += 8;
477 write_u16(p, 2); /* count */
478 p += 2;
479
480 /* album */
481 write_u16(p, sizeof(album_tag_header) + 2);
482 p += 2;
483 memcpy(p, album_tag_header, sizeof(album_tag_header));
484 p += sizeof(album_tag_header);
485 memcpy(p, null, 2);
486 p += 2;
487 write_u16(p, 0); /* data type (UTF-16) */
488 p += 2;
489 write_u16(p, album_bytes);
490 p += 2;
491 memcpy(p, album, album_bytes);
492 p += album_bytes;
493
494 /* year */
495 write_u16(p, sizeof(year_tag_header));
496 p += 2;
497 memcpy(p, year_tag_header, sizeof(year_tag_header));
498 p += sizeof(year_tag_header);
499 memcpy(p, null, 2);
500 p += 2;
501 write_u16(p, 0); /* data type (UTF-16) */
502 p += 2;
503 write_u16(p, year_bytes);
504 p += 2;
505 memcpy(p, year, year_bytes);
506 p += year_bytes;
507 assert(p - result->ptr == result->size);
508 ret = 1;
509 out:
510 free(album);
511 free(year);
512 return ret;
513 }
514
515 static int write_output_file(int fd, const char *map, size_t mapsize,
516 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
517 struct asf_object *cdo, struct asf_object *ecdo)
518 {
519 int i, ret;
520 uint64_t sz; /* of the new header object */
521 uint32_t num_objects;
522 char tmp[8];
523
524 sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
525 for (i = 0; i < top->num_objects; i++) {
526 if (i == ton->content_descr_obj_num)
527 continue;
528 if (i == ton->extended_content_descr_obj_num)
529 continue;
530 sz += top->objects[i].size;
531 }
532 sz += cdo->size;
533 sz += ecdo->size;
534 num_objects = top->num_objects;
535 if (ton->content_descr_obj_num < 0)
536 num_objects++;
537 if (ton->extended_content_descr_obj_num < 0)
538 num_objects++;
539 ret = xwrite(fd, top_level_header_object_guid, 16);
540 if (ret < 0)
541 goto out;
542 write_u64(tmp, sz);
543 ret = xwrite(fd, tmp, 8);
544 if (ret < 0)
545 goto out;
546 write_u32(tmp, num_objects);
547 ret = xwrite(fd, tmp, 4);
548 if (ret < 0)
549 goto out;
550 write_u8(tmp, top->reserved1);
551 ret = xwrite(fd, tmp, 1);
552 if (ret < 0)
553 goto out;
554 write_u8(tmp, top->reserved2);
555 ret = xwrite(fd, tmp, 1);
556 if (ret < 0)
557 goto out;
558 /*
559 * Write cto and ecto as objects 0 and 1 if they did not exist in the
560 * original file.
561 */
562 if (ton->content_descr_obj_num < 0) {
563 ret = xwrite(fd, cdo->ptr, cdo->size);
564 if (ret < 0)
565 goto out;
566 }
567 if (ton->extended_content_descr_obj_num < 0) {
568 ret = xwrite(fd, ecdo->ptr, ecdo->size);
569 if (ret < 0)
570 goto out;
571 }
572
573 for (i = 0; i < top->num_objects; i++) {
574 char *buf = top->objects[i].ptr;
575 sz = top->objects[i].size;
576 if (i == ton->content_descr_obj_num) {
577 buf = cdo->ptr;
578 sz = cdo->size;
579 } else if (i == ton->extended_content_descr_obj_num) {
580 buf = ecdo->ptr;
581 sz = ecdo->size;
582 }
583 ret = xwrite(fd, buf, sz);
584 if (ret < 0)
585 goto out;
586 }
587 ret = xwrite(fd, map + top->size, mapsize - top->size);
588 out:
589 return ret;
590 }
591
592 static int wma_rewrite_tags(const char *map, size_t mapsize,
593 struct taginfo *tags, int fd,
594 __a_unused const char *filename)
595 {
596 struct afs_top_level_header_object top;
597 struct tag_object_nums ton = {-1, -1};
598 const char *p = map;
599 /* (extended) content description object */
600 struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
601 int ret;
602
603 /* guid + size + num_objects + 2 * reserved */
604 if (mapsize < 16 + 8 + 4 + 1 + 1)
605 return -E_NO_WMA;
606 if (memcmp(map, top_level_header_object_guid, 16))
607 return -E_NO_WMA;
608 p += 16;
609 top.size = read_u64(p);
610 PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
611 if (top.size >= mapsize)
612 return -E_NO_WMA;
613 p += 8;
614 top.num_objects = read_u32(p);
615 PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
616 if (top.num_objects > top.size / 24)
617 return -E_NO_WMA;
618 p += 4;
619 top.reserved1 = read_u8(p);
620 p++;
621 top.reserved2 = read_u8(p);
622 if (top.reserved2 != 2)
623 return -E_NO_WMA;
624 p++; /* objects start at p */
625 top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
626 ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
627 top.objects, &ton);
628 if (ret < 0)
629 goto out;
630 ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
631 top.objects + ton.content_descr_obj_num : NULL, &cdo);
632 if (ret < 0)
633 goto out;
634 ret = make_ecdo(tags, &ecdo);
635 if (ret < 0)
636 goto out;
637 ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
638 &ecdo);
639 out:
640 free(cdo.ptr);
641 free(ecdo.ptr);
642 free(top.objects);
643 return ret;
644 }
645
646 static const char * const wma_suffixes[] = {"wma", NULL};
647
648 /**
649 * The init function of the wma audio format handler.
650 *
651 * \param afh Pointer to the struct to initialize.
652 */
653 void wma_afh_init(struct audio_format_handler *afh)
654 {
655 afh->get_file_info = wma_get_file_info;
656 afh->suffixes = wma_suffixes;
657 afh->rewrite_tags = wma_rewrite_tags;
658 }