build: Prefer AC_CHECK_TYPE() over AC_LINK_IFELSE().
[paraslash.git] / wma_afh.c
1 /*
2  * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3  *
4  * Licensed under the GPL v2. For licencing details see COPYING.
5  */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ps) for (_f = (_buf); \
22         _f + (_ps) < (_buf) + (_size); \
23         _f += (_ps))
24
25 /*
26  * Must be called on a frame boundary, e.g. start + header_len.
27  * \return Frame count, superframe count via *num_superframes.
28  */
29 static int count_frames(const char *buf, int buf_size, uint32_t packet_size,
30         int *num_superframes)
31 {
32         int fc = 0, sfc = 0; /* frame count, superframe count */
33         const uint8_t *p;
34
35
36         FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, packet_size) {
37                 fc += p[WMA_FRAME_SKIP] & 0x0f;
38                 sfc++;
39         }
40         PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41         *num_superframes = sfc;
42         return fc;
43 }
44
45 /*
46  * put_utf8() and get_str16() below are based on macros in libavutil/common.h
47  * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
48  * <michaelni@gmx.at>.
49  */
50
51 /*
52  * Convert a 32-bit Unicode character to its UTF-8 encoded form.
53  *
54  * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
55  * in the general case, depending on the length of the converted Unicode
56  * character.
57  *
58  * \param result Where the converted UTF-8 bytes are written.
59  */
60 static int put_utf8(uint32_t val, char *result)
61 {
62         char *out = result;
63         int bytes, shift;
64         uint32_t in = val;
65
66         if (in < 0x80) {
67                 *out++ = in;
68                 return 1;
69         }
70         bytes = DIV_ROUND_UP(wma_log2(in), 5);
71         shift = (bytes - 1) * 6;
72         *out++ = (256 - (256 >> bytes)) | (in >> shift);
73         while (shift >= 6) {
74                 shift -= 6;
75                 *out++ = 0x80 | ((in >> shift) & 0x3f);
76         }
77         return out - result;
78 }
79
80 static char *get_str16(const char *in, int len)
81 {
82         const char *p = in;
83         int out_size = 0, out_len = 0;
84         char *out = NULL;
85
86         len /= 2;
87         while (len--) {
88                 uint32_t x;
89                 if (out_len + 7 + 1 >= out_size) {
90                         out_size = 2 * out_size + 50;
91                         out = para_realloc(out, out_size);
92                 }
93                 x = read_u16(p);
94                 p += 2;
95                 out_len += put_utf8(x, out + out_len);
96                 if (x == 0)
97                         return out;
98         }
99         if (out)
100                 out[out_len] = '\0';
101         return out;
102 }
103
104 static const char content_description_header[] = {
105         0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
106         0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
107 };
108
109 static const char extended_content_header[] = {
110         0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
111         0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
112 };
113
114 static const char year_tag_header[] = { /* WM/Year */
115         0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
116         0x65, 0x00, 0x61, 0x00, 0x72, 0x00
117 };
118
119 static const char album_tag_header[] = { /* WM/AlbumTitle */
120         0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
121         0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
122         0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
123         0x65, 0x00
124 };
125
126 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
127 {
128         const char *p, *end = buf + buf_size, *q;
129         uint16_t len1, len2, len3, len4;
130
131         p = search_pattern(content_description_header,
132                 sizeof(content_description_header), buf, buf_size);
133         if (!p || p + 34 >= end) {
134                 PARA_NOTICE_LOG("content description header not found\n");
135                 goto next;
136         }
137         p += 24;
138         len1 = read_u16(p);
139         p += 2;
140         len2 = read_u16(p);
141         p += 2;
142         len3 = read_u16(p);
143         p += 2;
144         len4 = read_u16(p);
145         p += 2;
146         /* ignore length of the rating information */
147         p += 2;
148         if (p + len1 >= end)
149                 goto next;
150         ti->title = get_str16(p, len1);
151         p += len1;
152         if (p + len2 >= end)
153                 goto next;
154         ti->artist = get_str16(p, len2);
155         p += len2 + len3;
156         if (p + len4 >= end)
157                 goto next;
158         ti->comment = get_str16(p, len4);
159 next:
160         p = search_pattern(extended_content_header, sizeof(extended_content_header),
161                 buf, buf_size);
162         if (!p) {
163                 PARA_NOTICE_LOG("extended content header not found\n");
164                 return;
165         }
166         q = search_pattern(year_tag_header, sizeof(year_tag_header),
167                 p, end - p);
168         if (q) {
169                 const char *r = q + sizeof(year_tag_header) + 6;
170                 if (r < end)
171                         ti->year = get_str16(r, end - r);
172         }
173         q = search_pattern(album_tag_header, sizeof(album_tag_header),
174                 p, end - p);
175         if (q) {
176                 const char *r = q + sizeof(album_tag_header) + 6;
177                 if (r < end)
178                         ti->album = get_str16(r, end - r);
179         }
180 }
181
182 static void set_chunk_tv(int frames_per_chunk, int frequency,
183                 struct timeval *result)
184 {
185         uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
186                 / frequency;
187
188         result->tv_sec = x / 1000 / 1000;
189         result->tv_usec = x % (1000 * 1000);
190         PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
191 }
192
193 /* Must be called on a frame boundary. */
194 static int wma_make_chunk_table(char *buf, size_t buf_size, uint32_t packet_size,
195                 struct afh_info *afhi)
196 {
197         const uint8_t *f, *start = (uint8_t *)buf;
198         int j, frames_per_chunk;
199         size_t ct_size = 250;
200         int ret, count = 0, num_frames, num_superframes;
201
202         afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
203         afhi->chunk_table[0] = 0;
204         afhi->chunk_table[1] = afhi->header_len;
205
206         num_frames = count_frames(buf, buf_size, packet_size,
207                 &num_superframes);
208         ret = -E_NO_WMA;
209         if (num_frames == 0 || num_superframes == 0)
210                 goto fail;
211         afhi->seconds_total = num_frames * 2048 /* FIXME */
212                 / afhi->frequency;
213         frames_per_chunk = num_frames / num_superframes / 2;
214         PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
215         j = 1;
216         FOR_EACH_FRAME(f, start, buf_size, packet_size) {
217                 count += f[WMA_FRAME_SKIP] & 0x0f;
218                 while (count > j * frames_per_chunk) {
219                         j++;
220                         if (j >= ct_size) {
221                                 ct_size *= 2;
222                                 afhi->chunk_table = para_realloc(
223                                         afhi->chunk_table,
224                                         ct_size * sizeof(uint32_t));
225                         }
226                         afhi->chunk_table[j] = f - start + afhi->header_len
227                                 + packet_size;
228                 }
229         }
230         afhi->chunks_total = j;
231         set_max_chunk_size(afhi);
232         set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
233         return 1;
234 fail:
235         free(afhi->chunk_table);
236         return ret;
237 }
238
239 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
240         struct afh_info *afhi)
241 {
242         int ret;
243         struct asf_header_info ahi;
244
245         ret = read_asf_header(map, numbytes, &ahi);
246         if (ret < 0)
247                 return ret;
248         if (ret == 0)
249                 return -E_NO_WMA;
250         afhi->bitrate = ahi.bit_rate / 1000;
251         if (ahi.sample_rate == 0)
252                 return -E_NO_WMA;
253         afhi->frequency = ahi.sample_rate;
254         afhi->channels = ahi.channels;
255         afhi->header_len = ahi.header_len;
256
257         afhi->techinfo = make_message("%s%s%s%s%s",
258                 ahi.use_exp_vlc? "exp vlc" : "",
259                 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
260                 ahi.use_bit_reservoir? "bit reservoir" : "",
261                 (ahi.use_variable_block_len &&
262                         (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
263                 ahi.use_variable_block_len? "vbl" : ""
264         );
265         wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
266                 ahi.packet_size, afhi);
267         read_asf_tags(map, ahi.header_len, &afhi->tags);
268         return 0;
269 }
270
271 struct asf_object {
272         char *ptr;
273         uint64_t size;
274 };
275
276 struct tag_object_nums {
277         int content_descr_obj_num;
278         int extended_content_descr_obj_num;
279 };
280
281 struct afs_top_level_header_object {
282         uint64_t size;
283         uint32_t num_objects;
284         uint8_t reserved1, reserved2;
285         struct asf_object *objects;
286 };
287
288 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
289
290 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
291                 struct asf_object *objs, struct tag_object_nums *ton)
292 {
293         int i;
294         const char *p;
295
296         for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
297                 if (p + 24 > src + size)
298                         return -E_NO_WMA;
299                 objs[i].ptr = (char *)p;
300                 objs[i].size = read_u64(p + 16);
301                 if (p + objs[i].size > src + size)
302                         return -E_NO_WMA;
303
304                 if (CHECK_HEADER(p, content_description_header))
305                         ton->content_descr_obj_num = i;
306                 else if (CHECK_HEADER(p, extended_content_header))
307                         ton->extended_content_descr_obj_num = i;
308         }
309         return 1;
310 }
311
312 static const char top_level_header_object_guid[] = {
313         0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
314         0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
315 };
316
317 static int convert_utf8_to_utf16(char *src, char **dst)
318 {
319         iconv_t cd;
320         size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
321         char *inbuf, *outbuf;
322         int ret;
323
324         if (!src || !*src) {
325                 *dst = para_calloc(2);
326                 return 0;
327         }
328         /*
329          * Without specifying LE (little endian), iconv includes a byte order
330          * mark (e.g. 0xFFFE) at the beginning.
331          */
332         cd = iconv_open("UTF-16LE", "UTF-8");
333         if (cd == (iconv_t)-1) {
334                 *dst = NULL;
335                 return -ERRNO_TO_PARA_ERROR(errno);
336         }
337         inbuf = src;
338         /* even though src is in UTF-8, strlen() should DTRT */
339         inbytes = inbytesleft = strlen(src);
340         outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
341         *dst = outbuf = para_malloc(outbytes);
342         sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
343         if (sz == (size_t)-1) {
344                 ret = -ERRNO_TO_PARA_ERROR(errno);
345                 free(*dst);
346                 *dst = NULL;
347                 goto out;
348         }
349         assert(outbytes >= outbytesleft);
350         assert(outbytes - outbytesleft < INT_MAX - 2);
351         ret = outbytes - outbytesleft;
352         outbuf = para_realloc(*dst, ret + 2);
353         outbuf[ret] = outbuf[ret + 1] = '\0';
354         ret += 2;
355         *dst = outbuf;
356         PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
357 out:
358         if (iconv_close(cd) < 0)
359                 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
360         return ret;
361 }
362
363 /* The content description object contains artist, title, comment. */
364 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
365                 struct asf_object *result)
366 {
367         const char *cr, *rating; /* orig data */
368         uint16_t orig_cr_bytes, orig_rating_bytes;
369         /* pointers to new UTF-16 tags */
370         char *artist = NULL, *title = NULL, *comment = NULL;
371         /* number of bytes in UTF-16 for the new tags */
372         int artist_bytes, title_bytes, comment_bytes, ret;
373         char *p, null[2] = "\0\0";
374
375         result->ptr = NULL;
376         result->size = 0;
377         ret = convert_utf8_to_utf16(tags->artist, &artist);
378         if (ret < 0)
379                 return ret;
380         assert(artist);
381         artist_bytes = ret;
382         ret = convert_utf8_to_utf16(tags->title, &title);
383         if (ret < 0)
384                 goto out;
385         assert(title);
386         title_bytes = ret;
387         ret = convert_utf8_to_utf16(tags->comment, &comment);
388         if (ret < 0)
389                 goto out;
390         assert(comment);
391         comment_bytes = ret;
392
393         if (cdo) {
394                 uint16_t orig_title_bytes, orig_artist_bytes, orig_comment_bytes;
395                 /*
396                  * Sizes of the five fields (stored as 16-bit numbers) are
397                  * located after the header (16 bytes) and the cdo size (8
398                  * bytes).
399                  */
400                 orig_title_bytes = read_u16(cdo->ptr + 24);
401                 orig_artist_bytes = read_u16(cdo->ptr + 26);
402                 orig_cr_bytes = read_u16(cdo->ptr + 28);
403                 orig_comment_bytes = read_u16(cdo->ptr + 30);
404                 orig_rating_bytes = read_u16(cdo->ptr + 32);
405                 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
406                 rating = cr + orig_cr_bytes + orig_comment_bytes;
407         } else {
408                 orig_cr_bytes = 2;
409                 orig_rating_bytes = 2;
410                 cr = null;
411                 rating = null;
412         }
413
414         /* compute size of result cdo */
415         result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
416                 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
417         PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
418         p = result->ptr = para_malloc(result->size);
419         memcpy(p, content_description_header, 16);
420         p += 16;
421         write_u64(p, result->size);
422         p += 8;
423         write_u16(p, title_bytes);
424         p += 2;
425         write_u16(p, artist_bytes);
426         p += 2;
427         write_u16(p, orig_cr_bytes);
428         p += 2;
429         write_u16(p, comment_bytes);
430         p += 2;
431         write_u16(p, orig_rating_bytes);
432         p += 2;
433         memcpy(p, title, title_bytes);
434         p += title_bytes;
435         memcpy(p, artist, artist_bytes);
436         p += artist_bytes;
437         memcpy(p, cr, orig_cr_bytes);
438         p += orig_cr_bytes;
439         memcpy(p, comment, comment_bytes);
440         p += comment_bytes;
441         memcpy(p, rating, orig_rating_bytes);
442         p += orig_rating_bytes;
443         assert(p - result->ptr == result->size);
444         ret = 1;
445 out:
446         free(artist);
447         free(title);
448         free(comment);
449         return ret;
450 }
451
452 /* The extended content description object contains album and year. */
453 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
454 {
455         int ret;
456         char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
457         int album_bytes, year_bytes;
458
459         result->ptr = NULL;
460         result->size = 0;
461         ret = convert_utf8_to_utf16(tags->album, &album);
462         if (ret < 0)
463                 return ret;
464         assert(album);
465         album_bytes = ret;
466         ret = convert_utf8_to_utf16(tags->year, &year);
467         if (ret < 0)
468                 goto out;
469         assert(year);
470         year_bytes = ret;
471         result->size = 16 + 8 + 2; /* GUID, size, count */
472         /* name_length + name + null + data type + val length + val */
473         result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
474         result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
475
476         p = result->ptr = para_malloc(result->size);
477         memcpy(p, extended_content_header, 16);
478         p += 16;
479         write_u64(p, result->size);
480         p += 8;
481         write_u16(p, 2); /* count */
482         p += 2;
483
484         /* album */
485         write_u16(p, sizeof(album_tag_header) + 2);
486         p += 2;
487         memcpy(p, album_tag_header, sizeof(album_tag_header));
488         p += sizeof(album_tag_header);
489         memcpy(p, null, 2);
490         p += 2;
491         write_u16(p, 0); /* data type (UTF-16) */
492         p += 2;
493         write_u16(p, album_bytes);
494         p += 2;
495         memcpy(p, album, album_bytes);
496         p += album_bytes;
497
498         /* year */
499         write_u16(p, sizeof(year_tag_header));
500         p += 2;
501         memcpy(p, year_tag_header, sizeof(year_tag_header));
502         p += sizeof(year_tag_header);
503         memcpy(p, null, 2);
504         p += 2;
505         write_u16(p, 0); /* data type (UTF-16) */
506         p += 2;
507         write_u16(p, year_bytes);
508         p += 2;
509         memcpy(p, year, year_bytes);
510         p += year_bytes;
511         assert(p - result->ptr == result->size);
512         ret = 1;
513 out:
514         free(album);
515         free(year);
516         return ret;
517 }
518
519 static int write_output_file(int fd, const char *map, size_t mapsize,
520                 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
521                 struct asf_object *cdo, struct asf_object *ecdo)
522 {
523         int i, ret;
524         uint64_t sz; /* of the new header object */
525         uint32_t num_objects;
526         char tmp[8];
527
528         sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
529         for (i = 0; i < top->num_objects; i++) {
530                 if (i == ton->content_descr_obj_num)
531                         continue;
532                 if (i == ton->extended_content_descr_obj_num)
533                         continue;
534                 sz += top->objects[i].size;
535         }
536         sz += cdo->size;
537         sz += ecdo->size;
538         num_objects = top->num_objects;
539         if (ton->content_descr_obj_num < 0)
540                 num_objects++;
541         if (ton->extended_content_descr_obj_num < 0)
542                 num_objects++;
543         ret = xwrite(fd, top_level_header_object_guid, 16);
544         if (ret < 0)
545                 goto out;
546         write_u64(tmp, sz);
547         ret = xwrite(fd, tmp, 8);
548         if (ret < 0)
549                 goto out;
550         write_u32(tmp, num_objects);
551         ret = xwrite(fd, tmp, 4);
552         if (ret < 0)
553                 goto out;
554         write_u8(tmp, top->reserved1);
555         ret = xwrite(fd, tmp, 1);
556         if (ret < 0)
557                 goto out;
558         write_u8(tmp, top->reserved2);
559         ret = xwrite(fd, tmp, 1);
560         if (ret < 0)
561                 goto out;
562         /*
563          * Write cto and ecto as objects 0 and 1 if they did not exist in the
564          * original file.
565          */
566         if (ton->content_descr_obj_num < 0) {
567                 ret = xwrite(fd, cdo->ptr, cdo->size);
568                 if (ret < 0)
569                         goto out;
570         }
571         if (ton->extended_content_descr_obj_num < 0) {
572                 ret = xwrite(fd, ecdo->ptr, ecdo->size);
573                 if (ret < 0)
574                         goto out;
575         }
576
577         for (i = 0; i < top->num_objects; i++) {
578                 char *buf = top->objects[i].ptr;
579                 sz = top->objects[i].size;
580                 if (i == ton->content_descr_obj_num) {
581                         buf = cdo->ptr;
582                         sz = cdo->size;
583                 } else if (i == ton->extended_content_descr_obj_num) {
584                         buf = ecdo->ptr;
585                         sz = ecdo->size;
586                 }
587                 ret = xwrite(fd, buf, sz);
588                 if (ret < 0)
589                         goto out;
590         }
591         ret = xwrite(fd, map + top->size, mapsize - top->size);
592 out:
593         return ret;
594 }
595
596 static int wma_rewrite_tags(const char *map, size_t mapsize,
597                 struct taginfo *tags, int fd,
598                 __a_unused const char *filename)
599 {
600         struct afs_top_level_header_object top;
601         struct tag_object_nums ton = {-1, -1};
602         const char *p = map;
603         /* (extended) content description object */
604         struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
605         int ret;
606
607         /* guid + size + num_objects + 2 * reserved */
608         if (mapsize < 16 + 8 + 4 + 1 + 1)
609                 return -E_NO_WMA;
610         if (memcmp(map, top_level_header_object_guid, 16))
611                 return -E_NO_WMA;
612         p += 16;
613         top.size = read_u64(p);
614         PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
615         if (top.size >= mapsize)
616                 return -E_NO_WMA;
617         p += 8;
618         top.num_objects = read_u32(p);
619         PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
620         if (top.num_objects > top.size / 24)
621                 return -E_NO_WMA;
622         p += 4;
623         top.reserved1 = read_u8(p);
624         p++;
625         top.reserved2 = read_u8(p);
626         if (top.reserved2 != 2)
627                 return -E_NO_WMA;
628         p++; /* objects start at p */
629         top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
630         ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
631                 top.objects, &ton);
632         if (ret < 0)
633                 goto out;
634         ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
635                 top.objects + ton.content_descr_obj_num : NULL, &cdo);
636         if (ret < 0)
637                 goto out;
638         ret = make_ecdo(tags, &ecdo);
639         if (ret < 0)
640                 goto out;
641         ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
642                 &ecdo);
643 out:
644         free(cdo.ptr);
645         free(ecdo.ptr);
646         free(top.objects);
647         return ret;
648 }
649
650 static const char * const wma_suffixes[] = {"wma", NULL};
651
652 /**
653  * The init function of the wma audio format handler.
654  *
655  * \param afh Pointer to the struct to initialize.
656  */
657 void wma_afh_init(struct audio_format_handler *afh)
658 {
659         afh->get_file_info = wma_get_file_info;
660         afh->suffixes = wma_suffixes;
661         afh->rewrite_tags = wma_rewrite_tags;
662 }