wma_afh: Fix two bugs in convert_utf8_to_utf16().
[paraslash.git] / wma_afh.c
1 /*
2  * Copyright (C) 2009 Andre Noll <maan@tuebingen.mpg.de>
3  *
4  * Licensed under the GPL v2. For licencing details see COPYING.
5  */
6
7 /** \file wma_afh.c The audio format handler for WMA files. */
8
9 #include <sys/types.h>
10 #include <regex.h>
11 #include <iconv.h>
12
13 #include "para.h"
14 #include "error.h"
15 #include "afh.h"
16 #include "portable_io.h"
17 #include "string.h"
18 #include "wma.h"
19 #include "fd.h"
20
21 #define FOR_EACH_FRAME(_f, _buf, _size, _ps) for (_f = (_buf); \
22         _f + (_ps) < (_buf) + (_size); \
23         _f += (_ps))
24
25 /*
26  * Must be called on a frame boundary, e.g. start + header_len.
27  * \return Frame count, superframe count via *num_superframes.
28  */
29 static int count_frames(const char *buf, int buf_size, uint32_t packet_size,
30         int *num_superframes)
31 {
32         int fc = 0, sfc = 0; /* frame count, superframe count */
33         const uint8_t *p;
34
35
36         FOR_EACH_FRAME(p, (uint8_t *)buf, buf_size, packet_size) {
37                 fc += p[WMA_FRAME_SKIP] & 0x0f;
38                 sfc++;
39         }
40         PARA_INFO_LOG("%d frames, %d superframes\n", fc, sfc);
41         if (num_superframes)
42                 *num_superframes = sfc;
43         return fc;
44 }
45
46 /*
47  * put_utf8() and get_str16() below are based on macros in libavutil/common.h
48  * of the mplayer source code, copyright (c) 2006 Michael Niedermayer
49  * <michaelni@gmx.at>.
50  */
51
52 /*
53  * Convert a 32-bit Unicode character to its UTF-8 encoded form.
54  *
55  * Writes up to 4 bytes for values in the valid UTF-8 range and up to 7 bytes
56  * in the general case, depending on the length of the converted Unicode
57  * character.
58  *
59  * \param result Where the converted UTF-8 bytes are written.
60  */
61 static int put_utf8(uint32_t val, char *result)
62 {
63         char *out = result;
64         int bytes, shift;
65         uint32_t in = val;
66
67         if (in < 0x80) {
68                 *out++ = in;
69                 return 1;
70         }
71         bytes = (wma_log2(in) + 4) / 5;
72         shift = (bytes - 1) * 6;
73         *out++ = (256 - (256 >> bytes)) | (in >> shift);
74         while (shift >= 6) {
75                 shift -= 6;
76                 *out++ = 0x80 | ((in >> shift) & 0x3f);
77         }
78         return out - result;
79 }
80
81 static char *get_str16(const char *in, int len)
82 {
83         const char *p = in;
84         int out_size = 0, out_len = 0;
85         char *out = NULL;
86
87         len /= 2;
88         while (len--) {
89                 uint32_t x;
90                 if (out_len + 7 + 1 >= out_size) {
91                         out_size = 2 * out_size + 50;
92                         out = para_realloc(out, out_size);
93                 }
94                 x = read_u16(p);
95                 p += 2;
96                 out_len += put_utf8(x, out + out_len);
97                 if (x == 0)
98                         return out;
99         }
100         if (out)
101                 out[out_len] = '\0';
102         return out;
103 }
104
105 static const char content_description_header[] = {
106         0x33, 0x26, 0xb2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
107         0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
108 };
109
110 static const char extended_content_header[] = {
111         0x40, 0xA4, 0xD0, 0xD2, 0x07, 0xE3, 0xD2, 0x11,
112         0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5E, 0xA8, 0x50
113 };
114
115 static const char year_tag_header[] = { /* WM/Year */
116         0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x59, 0x00,
117         0x65, 0x00, 0x61, 0x00, 0x72, 0x00
118 };
119
120 static const char album_tag_header[] = { /* WM/AlbumTitle */
121         0x57, 0x00, 0x4d, 0x00, 0x2f, 0x00, 0x41, 0x00,
122         0x6c, 0x00, 0x62, 0x00, 0x75, 0x00, 0x6d, 0x00,
123         0x54, 0x00, 0x69, 0x00, 0x74, 0x00, 0x6c, 0x00,
124         0x65, 0x00
125 };
126
127 static void read_asf_tags(const char *buf, int buf_size, struct taginfo *ti)
128 {
129         const char *p, *end = buf + buf_size, *q;
130         uint16_t len1, len2, len3, len4;
131
132         p = search_pattern(content_description_header,
133                 sizeof(content_description_header), buf, buf_size);
134         if (!p || p + 34 >= end) {
135                 PARA_NOTICE_LOG("content description header not found\n");
136                 goto next;
137         }
138         p += 24;
139         len1 = read_u16(p);
140         p += 2;
141         len2 = read_u16(p);
142         p += 2;
143         len3 = read_u16(p);
144         p += 2;
145         len4 = read_u16(p);
146         p += 2;
147         /* ignore length of the rating information */
148         p += 2;
149         if (p + len1 >= end)
150                 goto next;
151         ti->title = get_str16(p, len1);
152         p += len1;
153         if (p + len2 >= end)
154                 goto next;
155         ti->artist = get_str16(p, len2);
156         p += len2 + len3;
157         if (p + len4 >= end)
158                 goto next;
159         ti->comment = get_str16(p, len4);
160 next:
161         p = search_pattern(extended_content_header, sizeof(extended_content_header),
162                 buf, buf_size);
163         if (!p) {
164                 PARA_NOTICE_LOG("extended content header not found\n");
165                 return;
166         }
167         q = search_pattern(year_tag_header, sizeof(year_tag_header),
168                 p, end - p);
169         if (q) {
170                 const char *r = q + sizeof(year_tag_header) + 6;
171                 if (r < end)
172                         ti->year = get_str16(r, end - r);
173         }
174         q = search_pattern(album_tag_header, sizeof(album_tag_header),
175                 p, end - p);
176         if (q) {
177                 const char *r = q + sizeof(album_tag_header) + 6;
178                 if (r < end)
179                         ti->album = get_str16(r, end - r);
180         }
181 }
182
183 static void set_chunk_tv(int frames_per_chunk, int frequency,
184                 struct timeval *result)
185 {
186         uint64_t x = (uint64_t)frames_per_chunk * 2048 * 1000 * 1000
187                 / frequency;
188
189         result->tv_sec = x / 1000 / 1000;
190         result->tv_usec = x % (1000 * 1000);
191         PARA_INFO_LOG("chunk time: %lums\n", tv2ms(result));
192 }
193
194 /* Must be called on a frame boundary. */
195 static int wma_make_chunk_table(char *buf, size_t buf_size, uint32_t packet_size,
196                 struct afh_info *afhi)
197 {
198         const uint8_t *f, *start = (uint8_t *)buf;
199         int j, frames_per_chunk;
200         size_t ct_size = 250;
201         int ret, count = 0, num_frames, num_superframes;
202
203         afhi->chunk_table = para_malloc(ct_size * sizeof(uint32_t));
204         afhi->chunk_table[0] = 0;
205         afhi->chunk_table[1] = afhi->header_len;
206
207         num_frames = count_frames(buf, buf_size, packet_size,
208                 &num_superframes);
209         ret = -E_NO_WMA;
210         if (num_frames == 0 || num_superframes == 0)
211                 goto fail;
212         afhi->seconds_total = num_frames * 2048 /* FIXME */
213                 / afhi->frequency;
214         frames_per_chunk = num_frames / num_superframes / 2;
215         PARA_INFO_LOG("%d frames per chunk\n", frames_per_chunk);
216         j = 1;
217         FOR_EACH_FRAME(f, start, buf_size, packet_size) {
218                 count += f[WMA_FRAME_SKIP] & 0x0f;
219                 while (count > j * frames_per_chunk) {
220                         j++;
221                         if (j >= ct_size) {
222                                 ct_size *= 2;
223                                 afhi->chunk_table = para_realloc(
224                                         afhi->chunk_table,
225                                         ct_size * sizeof(uint32_t));
226                         }
227                         afhi->chunk_table[j] = f - start + afhi->header_len
228                                 + packet_size;
229                 }
230         }
231         afhi->chunks_total = j;
232         set_chunk_tv(frames_per_chunk, afhi->frequency, &afhi->chunk_tv);
233         return 1;
234 fail:
235         free(afhi->chunk_table);
236         return ret;
237 }
238
239 static int wma_get_file_info(char *map, size_t numbytes, __a_unused int fd,
240         struct afh_info *afhi)
241 {
242         int ret;
243         struct asf_header_info ahi;
244
245         ret = read_asf_header(map, numbytes, &ahi);
246         if (ret < 0)
247                 return ret;
248         if (ret == 0)
249                 return -E_NO_WMA;
250         afhi->bitrate = ahi.bit_rate / 1000;
251         if (ahi.sample_rate == 0)
252                 return -E_NO_WMA;
253         afhi->frequency = ahi.sample_rate;
254         afhi->channels = ahi.channels;
255         afhi->header_len = ahi.header_len;
256
257         afhi->techinfo = make_message("%s%s%s%s%s",
258                 ahi.use_exp_vlc? "exp vlc" : "",
259                 (ahi.use_bit_reservoir && ahi.use_exp_vlc)? ", " : "",
260                 ahi.use_bit_reservoir? "bit reservoir" : "",
261                 (ahi.use_variable_block_len &&
262                         (ahi.use_exp_vlc || ahi.use_bit_reservoir)? ", " : ""),
263                 ahi.use_variable_block_len? "vbl" : ""
264         );
265         wma_make_chunk_table(map + ahi.header_len, numbytes - ahi.header_len,
266                 ahi.packet_size, afhi);
267         read_asf_tags(map, ahi.header_len, &afhi->tags);
268         return 0;
269 }
270
271 struct asf_object {
272         char *ptr;
273         uint64_t size;
274 };
275
276 struct tag_object_nums {
277         int content_descr_obj_num;
278         int extended_content_descr_obj_num;
279 };
280
281 struct afs_top_level_header_object {
282         uint64_t size;
283         uint32_t num_objects;
284         uint8_t reserved1, reserved2;
285         struct asf_object *objects;
286 };
287
288 #define CHECK_HEADER(_p, _h) (memcmp((_p), (_h), sizeof((_h))) == 0)
289
290 static int read_asf_objects(const char *src, size_t size, uint32_t num_objects,
291                 struct asf_object *objs, struct tag_object_nums *ton)
292 {
293         int i;
294         const char *p;
295
296         for (i = 0, p = src; i < num_objects; p += objs[i++].size) {
297                 if (p + 24 > src + size)
298                         return -E_NO_WMA;
299                 objs[i].ptr = (char *)p;
300                 objs[i].size = read_u64(p + 16);
301                 if (p + objs[i].size > src + size)
302                         return -E_NO_WMA;
303
304                 if (CHECK_HEADER(p, content_description_header))
305                         ton->content_descr_obj_num = i;
306                 else if (CHECK_HEADER(p, extended_content_header))
307                         ton->extended_content_descr_obj_num = i;
308         }
309         return 1;
310 }
311
312 static const char top_level_header_object_guid[] = {
313         0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf, 0x11,
314         0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
315 };
316
317 static int convert_utf8_to_utf16(char *src, char **dst)
318 {
319         iconv_t cd;
320         size_t sz, inbytes, outbytes, inbytesleft, outbytesleft;
321         char *inbuf, *outbuf;
322         int ret;
323
324         if (!src || !*src) {
325                 *dst = para_calloc(2);
326                 return 0;
327         }
328         /*
329          * Without specifying LE (little endian), iconv includes a byte order
330          * mark (e.g. 0xFFFE) at the beginning.
331          */
332         cd = iconv_open("UTF-16LE", "UTF-8");
333         if (cd == (iconv_t)-1) {
334                 *dst = NULL;
335                 return -ERRNO_TO_PARA_ERROR(errno);
336         }
337         inbuf = src;
338         /* even though src is in UTF-8, strlen() should DTRT */
339         inbytes = inbytesleft = strlen(src);
340         outbytes = outbytesleft = 4 * inbytes + 2; /* hope that's enough */
341         *dst = outbuf = para_malloc(outbytes);
342         sz = iconv(cd, ICONV_CAST &inbuf, &inbytesleft, &outbuf, &outbytesleft);
343         if (sz == (size_t)-1) {
344                 ret = -ERRNO_TO_PARA_ERROR(errno);
345                 free(*dst);
346                 *dst = NULL;
347                 goto out;
348         }
349         assert(outbytes >= outbytesleft);
350         assert(outbytes - outbytesleft < INT_MAX - 2);
351         ret = outbytes - outbytesleft;
352         outbuf = para_realloc(*dst, ret + 2);
353         outbuf[ret] = outbuf[ret + 1] = '\0';
354         ret += 2;
355         *dst = outbuf;
356         PARA_INFO_LOG("converted %s to %d UTF-16 bytes\n", src, ret);
357 out:
358         if (iconv_close(cd) < 0)
359                 PARA_WARNING_LOG("iconv_close: %s\n", strerror(errno));
360         return ret;
361 }
362
363 /* The content description object contains artist, title, comment. */
364 static int make_cdo(struct taginfo *tags, const struct asf_object *cdo,
365                 struct asf_object *result)
366 {
367         const char *cr, *rating; /* orig data */
368         uint16_t orig_title_bytes, orig_artist_bytes, orig_cr_bytes,
369                 orig_comment_bytes, orig_rating_bytes;
370         /* pointers to new UTF-16 tags */
371         char *artist = NULL, *title = NULL, *comment = NULL;
372         /* number of bytes in UTF-16 for the new tags */
373         int artist_bytes, title_bytes, comment_bytes, ret;
374         char *p, null[2] = "\0\0";
375
376         result->ptr = NULL;
377         result->size = 0;
378         ret = convert_utf8_to_utf16(tags->artist, &artist);
379         if (ret < 0)
380                 return ret;
381         artist_bytes = ret;
382         ret = convert_utf8_to_utf16(tags->title, &title);
383         if (ret < 0)
384                 goto out;
385         title_bytes = ret;
386         ret = convert_utf8_to_utf16(tags->comment, &comment);
387         if (ret < 0)
388                 goto out;
389         comment_bytes = ret;
390
391         if (cdo) {
392                 /*
393                  * Sizes of the five fields (stored as 16-bit numbers) are
394                  * located after the header (16 bytes) and the cdo size (8
395                  * bytes).
396                  */
397                 orig_title_bytes = read_u16(cdo->ptr + 24);
398                 orig_artist_bytes = read_u16(cdo->ptr + 26);
399                 orig_cr_bytes = read_u16(cdo->ptr + 28);
400                 orig_comment_bytes = read_u16(cdo->ptr + 30);
401                 orig_rating_bytes = read_u16(cdo->ptr + 32);
402                 cr = cdo->ptr + 34 + orig_title_bytes + orig_artist_bytes;
403                 rating = cr + orig_cr_bytes + orig_comment_bytes;
404         } else {
405                 orig_title_bytes = 2;
406                 orig_artist_bytes = 2;
407                 orig_cr_bytes = 2;
408                 orig_comment_bytes = 2;
409                 orig_rating_bytes = 2;
410                 cr = null;
411                 rating = null;
412         }
413
414         /* compute size of result cdo */
415         result->size = 16 + 8 + 5 * 2 + title_bytes + artist_bytes
416                 + orig_cr_bytes + comment_bytes + orig_rating_bytes;
417         PARA_DEBUG_LOG("cdo is %zu bytes\n", (size_t)result->size);
418         p = result->ptr = para_malloc(result->size);
419         memcpy(p, content_description_header, 16);
420         p += 16;
421         write_u64(p, result->size);
422         p += 8;
423         write_u16(p, title_bytes);
424         p += 2;
425         write_u16(p, artist_bytes);
426         p += 2;
427         write_u16(p, orig_cr_bytes);
428         p += 2;
429         write_u16(p, comment_bytes);
430         p += 2;
431         write_u16(p, orig_rating_bytes);
432         p += 2;
433         memcpy(p, title, title_bytes);
434         p += title_bytes;
435         memcpy(p, artist, artist_bytes);
436         p += artist_bytes;
437         memcpy(p, cr, orig_cr_bytes);
438         p += orig_cr_bytes;
439         memcpy(p, comment, comment_bytes);
440         p += comment_bytes;
441         memcpy(p, rating, orig_rating_bytes);
442         p += orig_rating_bytes;
443         assert(p - result->ptr == result->size);
444         ret = 1;
445 out:
446         free(artist);
447         free(title);
448         free(comment);
449         return ret;
450 }
451
452 /* The extended content description object contains album and year. */
453 static int make_ecdo(struct taginfo *tags, struct asf_object *result)
454 {
455         int ret;
456         char *p, *album = NULL, *year = NULL, null[2] = "\0\0";
457         int album_bytes, year_bytes;
458
459         result->ptr = NULL;
460         result->size = 0;
461         ret = convert_utf8_to_utf16(tags->album, &album);
462         if (ret < 0)
463                 return ret;
464         album_bytes = ret;
465         ret = convert_utf8_to_utf16(tags->year, &year);
466         if (ret < 0)
467                 goto out;
468         year_bytes = ret;
469         result->size = 16 + 8 + 2; /* GUID, size, count */
470         /* name_length + name + null + data type + val length + val */
471         result->size += 2 + sizeof(album_tag_header) + 2 + 2 + 2 + album_bytes;
472         result->size += 2 + sizeof(year_tag_header) + 2 + 2 + 2 + year_bytes;
473
474         p = result->ptr = para_malloc(result->size);
475         memcpy(p, extended_content_header, 16);
476         p += 16;
477         write_u64(p, result->size);
478         p += 8;
479         write_u16(p, 2); /* count */
480         p += 2;
481
482         /* album */
483         write_u16(p, sizeof(album_tag_header) + 2);
484         p += 2;
485         memcpy(p, album_tag_header, sizeof(album_tag_header));
486         p += sizeof(album_tag_header);
487         memcpy(p, null, 2);
488         p += 2;
489         write_u16(p, 0); /* data type (UTF-16) */
490         p += 2;
491         write_u16(p, album_bytes);
492         p += 2;
493         memcpy(p, album, album_bytes);
494         p += album_bytes;
495
496         /* year */
497         write_u16(p, sizeof(year_tag_header));
498         p += 2;
499         memcpy(p, year_tag_header, sizeof(year_tag_header));
500         p += sizeof(year_tag_header);
501         memcpy(p, null, 2);
502         p += 2;
503         write_u16(p, 0); /* data type (UTF-16) */
504         p += 2;
505         write_u16(p, year_bytes);
506         p += 2;
507         memcpy(p, year, year_bytes);
508         p += year_bytes;
509         assert(p - result->ptr == result->size);
510         ret = 1;
511 out:
512         free(album);
513         free(year);
514         return ret;
515 }
516
517 static int write_output_file(int fd, const char *map, size_t mapsize,
518                 struct afs_top_level_header_object *top, struct tag_object_nums *ton,
519                 struct asf_object *cdo, struct asf_object *ecdo)
520 {
521         int i, ret;
522         uint64_t sz; /* of the new header object */
523         uint32_t num_objects;
524         char tmp[8];
525
526         sz = 16 + 8 + 4 + 1 + 1; /* top-level header object */
527         for (i = 0; i < top->num_objects; i++) {
528                 if (i == ton->content_descr_obj_num)
529                         continue;
530                 if (i == ton->extended_content_descr_obj_num)
531                         continue;
532                 sz += top->objects[i].size;
533         }
534         sz += cdo->size;
535         sz += ecdo->size;
536         num_objects = top->num_objects;
537         if (ton->content_descr_obj_num < 0)
538                 num_objects++;
539         if (ton->extended_content_descr_obj_num < 0)
540                 num_objects++;
541         ret = xwrite(fd, top_level_header_object_guid, 16);
542         if (ret < 0)
543                 goto out;
544         write_u64(tmp, sz);
545         ret = xwrite(fd, tmp, 8);
546         if (ret < 0)
547                 goto out;
548         write_u32(tmp, num_objects);
549         ret = xwrite(fd, tmp, 4);
550         if (ret < 0)
551                 goto out;
552         write_u8(tmp, top->reserved1);
553         ret = xwrite(fd, tmp, 1);
554         if (ret < 0)
555                 goto out;
556         write_u8(tmp, top->reserved2);
557         ret = xwrite(fd, tmp, 1);
558         if (ret < 0)
559                 goto out;
560         /*
561          * Write cto and ecto as objects 0 and 1 if they did not exist in the
562          * original file.
563          */
564         if (ton->content_descr_obj_num < 0) {
565                 ret = xwrite(fd, cdo->ptr, cdo->size);
566                 if (ret < 0)
567                         goto out;
568         }
569         if (ton->extended_content_descr_obj_num < 0) {
570                 ret = xwrite(fd, ecdo->ptr, ecdo->size);
571                 if (ret < 0)
572                         goto out;
573         }
574
575         for (i = 0; i < top->num_objects; i++) {
576                 char *buf = top->objects[i].ptr;
577                 sz = top->objects[i].size;
578                 if (i == ton->content_descr_obj_num) {
579                         buf = cdo->ptr;
580                         sz = cdo->size;
581                 } else if (i == ton->extended_content_descr_obj_num) {
582                         buf = ecdo->ptr;
583                         sz = ecdo->size;
584                 }
585                 ret = xwrite(fd, buf, sz);
586                 if (ret < 0)
587                         goto out;
588         }
589         ret = xwrite(fd, map + top->size, mapsize - top->size);
590 out:
591         return ret;
592 }
593
594 static int wma_rewrite_tags(const char *map, size_t mapsize,
595                 struct taginfo *tags, int fd,
596                 __a_unused const char *filename)
597 {
598         struct afs_top_level_header_object top;
599         struct tag_object_nums ton = {-1, -1};
600         const char *p = map;
601         /* (extended) content description object */
602         struct asf_object cdo = {.ptr = NULL}, ecdo = {.ptr = NULL};
603         int ret;
604
605         /* guid + size + num_objects + 2 * reserved */
606         if (mapsize < 16 + 8 + 4 + 1 + 1)
607                 return -E_NO_WMA;
608         if (memcmp(map, top_level_header_object_guid, 16))
609                 return -E_NO_WMA;
610         p += 16;
611         top.size = read_u64(p);
612         PARA_INFO_LOG("header_size: %lu\n", (long unsigned)top.size);
613         if (top.size >= mapsize)
614                 return -E_NO_WMA;
615         p += 8;
616         top.num_objects = read_u32(p);
617         PARA_NOTICE_LOG("%u header objects\n", top.num_objects);
618         if (top.num_objects > top.size / 24)
619                 return -E_NO_WMA;
620         p += 4;
621         top.reserved1 = read_u8(p);
622         p++;
623         top.reserved2 = read_u8(p);
624         if (top.reserved2 != 2)
625                 return -E_NO_WMA;
626         p++; /* objects start at p */
627         top.objects = para_malloc(top.num_objects * sizeof(struct asf_object));
628         ret = read_asf_objects(p, top.size - (p - map), top.num_objects,
629                 top.objects, &ton);
630         if (ret < 0)
631                 goto out;
632         ret = make_cdo(tags, ton.content_descr_obj_num >= 0?
633                 top.objects + ton.content_descr_obj_num : NULL, &cdo);
634         if (ret < 0)
635                 goto out;
636         ret = make_ecdo(tags, &ecdo);
637         if (ret < 0)
638                 goto out;
639         ret = write_output_file(fd, map, mapsize, &top, &ton, &cdo,
640                 &ecdo);
641 out:
642         free(cdo.ptr);
643         free(ecdo.ptr);
644         free(top.objects);
645         return ret;
646 }
647
648 static const char * const wma_suffixes[] = {"wma", NULL};
649
650 /**
651  * The init function of the wma audio format handler.
652  *
653  * \param afh Pointer to the struct to initialize.
654  */
655 void wma_afh_init(struct audio_format_handler *afh)
656 {
657         afh->get_file_info = wma_get_file_info;
658         afh->suffixes = wma_suffixes;
659         afh->rewrite_tags = wma_rewrite_tags;
660 }