2 * Copyright (C) 2003-2005 M. Bakker, Nero AG, http://www.nero.com
3 * FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
12 #include "portable_io.h"
17 * The three states of the mp4 parser. The parser only loads the audio specific
18 * values and tables when it is in the second state.
20 enum audio_track_state {
21 /** We haven't encountered an mp4a atom so far. */
23 /** We have seen an mp4a atom but no subsequent trak atom yet. */
25 /** A trak atom was seen *after* the mp4a atom. */
30 /* determines which atoms we still need to parse. */
31 enum audio_track_state state;
34 uint16_t channel_count;
38 uint32_t stsz_sample_size;
39 uint32_t stsz_sample_count;
43 uint32_t stts_entry_count;
44 uint32_t *stts_sample_count;
47 uint32_t stsc_entry_count;
48 uint32_t *stsc_first_chunk;
49 uint32_t *stsc_samples_per_chunk;
52 uint32_t stco_entry_count;
53 uint32_t *stco_chunk_offset;
61 const struct mp4_callback *cb;
73 struct mp4_track track;
74 struct mp4_metadata meta;
78 * Returns -E_MP4_READ, 0, or 1 on errors/EOF/success. Partial reads followed
79 * by EOF or read errors are treated as errors.
81 static int read_data(struct mp4 *f, void *data, size_t size)
84 ssize_t ret = f->cb->read(f->cb->user_data, data, size);
85 if (ret < 0 && errno == EINTR)
87 /* regard EAGAIN as an error as reads should be blocking. */
89 return ret < 0? -E_MP4_READ : 0;
95 static int read_int64(struct mp4 *f, uint64_t *result)
98 int ret = read_data(f, data, 8);
101 *result = read_u64_be(data);
105 static int read_int32(struct mp4 *f, uint32_t *result)
108 int ret = read_data(f, data, 4);
111 *result = read_u32_be(data);
115 static int read_int16(struct mp4 *f, uint16_t *result)
118 int ret = read_data(f, data, 2);
121 *result = read_u16_be(data);
126 ATOM_ITEM(MOOV, 'm', 'o', 'o', 'v') \
127 ATOM_ITEM(TRAK, 't', 'r', 'a', 'k') \
128 ATOM_ITEM(MDIA, 'm', 'd', 'i', 'a') \
129 ATOM_ITEM(MINF, 'm', 'i', 'n', 'f') \
130 ATOM_ITEM(STBL, 's', 't', 'b', 'l') \
131 ATOM_ITEM(UDTA, 'u', 'd', 't', 'a') \
132 ATOM_ITEM(ILST, 'i', 'l', 's', 't') /* iTunes Metadata list */ \
133 ATOM_ITEM(ARTIST, 0xa9, 'A', 'R', 'T') \
134 ATOM_ITEM(TITLE, 0xa9, 'n', 'a', 'm') \
135 ATOM_ITEM(ALBUM, 0xa9, 'a', 'l', 'b') \
136 ATOM_ITEM(DATE, 0xa9, 'd', 'a', 'y') \
137 ATOM_ITEM(COMMENT, 0xa9, 'c', 'm', 't') \
138 ATOM_ITEM(MDHD, 'm', 'd', 'h', 'd') /* track header */ \
139 ATOM_ITEM(STSD, 's', 't', 's', 'd') /* sample description box */ \
140 ATOM_ITEM(STTS, 's', 't', 't', 's') /* time to sample box */ \
141 ATOM_ITEM(STSZ, 's', 't', 's', 'z') /* sample size box */ \
142 ATOM_ITEM(STCO, 's', 't', 'c', 'o') /* chunk offset box */ \
143 ATOM_ITEM(STSC, 's', 't', 's', 'c') /* sample to chunk box */ \
144 ATOM_ITEM(MP4A, 'm', 'p', '4', 'a') \
145 ATOM_ITEM(META, 'm', 'e', 't', 'a') /* iTunes Metadata box */ \
146 ATOM_ITEM(DATA, 'd', 'a', 't', 'a') /* iTunes Metadata data box */ \
148 #define ATOM_ITEM(_name, a, b, c, d) ATOM_ ## _name,
149 enum atom {ATOM_ITEMS};
152 static uint8_t atom_name_to_type(uint8_t *p)
154 #define ATOM_VALUE(a, b, c, d) ((a << 24) + (b << 16) + (c << 8) + d)
155 #define ATOM_ITEM(_name, a, b, c, d) \
156 {.name = # _name, .val = ATOM_VALUE(a, b, c, d)},
157 static const struct {
160 } atom_table[] = {ATOM_ITEMS};
162 uint32_t val = read_u32_be(p);
164 for (uint8_t n = 0; n < ARRAY_SIZE(atom_table); n++)
165 if (val == atom_table[n].val)
170 /* read atom header, atom size is returned with header included. */
171 static int atom_read_header(struct mp4 *f, uint8_t *atom_type,
172 uint8_t *header_size, uint64_t *atom_size)
176 uint8_t atom_header[8];
178 ret = read_data(f, atom_header, 8);
181 size = read_u32_be(atom_header);
182 if (size == 1) { /* 64 bit atom size */
185 ret = read_int64(f, atom_size);
193 *atom_type = atom_name_to_type(atom_header + 4);
197 static off_t get_position(const struct mp4 *f)
199 return f->cb->seek(f->cb->user_data, 0, SEEK_CUR);
202 static void set_position(struct mp4 *f, off_t position)
204 f->cb->seek(f->cb->user_data, position, SEEK_SET);
207 static void skip_bytes(struct mp4 *f, off_t num_skip)
209 f->cb->seek(f->cb->user_data, num_skip, SEEK_CUR);
212 static int read_stsz(struct mp4 *f)
215 struct mp4_track *t = &f->track;
217 if (t->state != ATS_SEEN_MP4A || t->stsz_table)
219 skip_bytes(f, 4); /* version (1), flags (3) */
220 ret = read_int32(f, &t->stsz_sample_size);
223 ret = read_int32(f, &t->stsz_sample_count);
226 if (t->stsz_sample_size != 0)
228 t->stsz_table = para_malloc(t->stsz_sample_count * sizeof(int32_t));
229 for (uint32_t n = 0; n < t->stsz_sample_count; n++) {
230 ret = read_int32(f, &t->stsz_table[n]);
237 static int read_stts(struct mp4 *f)
240 struct mp4_track *t = &f->track;
242 if (t->state != ATS_SEEN_MP4A || t->stts_sample_count)
244 skip_bytes(f, 4); /* version (1), flags (3) */
245 ret = read_int32(f, &t->stts_entry_count);
248 t->stts_sample_count = para_malloc(t->stts_entry_count
250 for (uint32_t n = 0; n < t->stts_entry_count; n++) {
251 ret = read_int32(f, &t->stts_sample_count[n]);
254 skip_bytes(f, 4); /* sample delta */
259 static int read_stsc(struct mp4 *f)
262 struct mp4_track *t = &f->track;
264 if (t->state != ATS_SEEN_MP4A)
266 if (t->stsc_first_chunk || t->stsc_samples_per_chunk)
268 skip_bytes(f, 4); /* version (1), flags (3) */
269 ret = read_int32(f, &t->stsc_entry_count);
272 t->stsc_first_chunk = para_malloc(t->stsc_entry_count * sizeof(int32_t));
273 t->stsc_samples_per_chunk = para_malloc(t->stsc_entry_count
275 for (uint32_t n = 0; n < t->stsc_entry_count; n++) {
276 ret = read_int32(f, &t->stsc_first_chunk[n]);
279 ret = read_int32(f, &t->stsc_samples_per_chunk[n]);
282 skip_bytes(f, 4); /* sample desc index */
287 static int read_stco(struct mp4 *f)
290 struct mp4_track *t = &f->track;
292 if (t->state != ATS_SEEN_MP4A || t->stco_chunk_offset)
294 skip_bytes(f, 4); /* version (1), flags (3) */
295 ret = read_int32(f, &t->stco_entry_count);
298 t->stco_chunk_offset = para_malloc(t->stco_entry_count
300 for (uint32_t n = 0; n < t->stco_entry_count; n++) {
301 ret = read_int32(f, &t->stco_chunk_offset[n]);
308 static int read_stsd(struct mp4 *f)
311 uint32_t entry_count;
313 if (f->track.state != ATS_INITIAL)
315 skip_bytes(f, 4); /* version (1), flags (3) */
316 ret = read_int32(f, &entry_count);
319 for (uint32_t n = 0; n < entry_count; n++) {
320 uint64_t skip = get_position(f);
322 uint8_t atom_type = 0;
323 ret = atom_read_header(f, &atom_type, NULL, &size);
327 if (atom_type == ATOM_MP4A) {
328 f->track.state = ATS_SEEN_MP4A;
329 /* reserved (6), data reference index (2), reserved (8) */
331 ret = read_int16(f, &f->track.channel_count);
335 ret = read_int16(f, &f->track.sample_rate);
339 set_position(f, skip);
344 static const char *get_metadata_name(uint8_t atom_type)
347 case ATOM_TITLE: return "title";
348 case ATOM_ARTIST: return "artist";
349 case ATOM_ALBUM: return "album";
350 case ATOM_DATE: return "date";
351 case ATOM_COMMENT: return "comment";
352 default: return "unknown";
356 static int parse_tag(struct mp4 *f, uint8_t parent, int32_t size)
359 uint64_t subsize, sumsize;
368 set_position(f, destpos), sumsize += subsize
371 uint8_t header_size = 0;
372 ret = atom_read_header(f, &atom_type, &header_size, &subsize);
375 destpos = get_position(f) + subsize - header_size;
376 if (atom_type != ATOM_DATA)
378 skip_bytes(f, 8); /* version (1), flags (3), reserved (4) */
379 ret = -E_MP4_CORRUPT;
380 if (subsize < header_size + 8 || subsize > UINT_MAX)
382 len = subsize - (header_size + 8);
384 value = para_malloc(len + 1);
385 ret = read_data(f, value, len);
391 return -E_MP4_CORRUPT;
392 f->meta.tags = para_realloc(f->meta.tags, (f->meta.count + 1)
393 * sizeof(struct mp4_tag));
394 tag = f->meta.tags + f->meta.count;
395 tag->item = para_strdup(get_metadata_name(parent));
405 static int read_mdhd(struct mp4 *f)
409 struct mp4_track *t = &f->track;
411 if (t->state != ATS_INITIAL)
413 ret = read_int32(f, &version);
417 skip_bytes(f, 16); /* creation time (8), modification time (8) */
418 ret = read_int32(f, &t->time_scale);
421 ret = read_int64(f, &t->duration);
424 } else { //version == 0
427 skip_bytes(f, 8); /* creation time (4), modification time (4) */
428 ret = read_int32(f, &t->time_scale);
431 ret = read_int32(f, &temp);
434 t->duration = (temp == (uint32_t) (-1))?
435 (uint64_t) (-1) : (uint64_t) (temp);
441 static int32_t read_ilst(struct mp4 *f, int32_t size)
444 uint64_t sumsize = 0;
446 while (sumsize < size) {
448 uint64_t subsize, destpos;
449 uint8_t header_size = 0;
450 ret = atom_read_header(f, &atom_type, &header_size, &subsize);
453 destpos = get_position(f) + subsize - header_size;
460 ret = parse_tag(f, atom_type, subsize - header_size);
464 set_position(f, destpos);
470 static int32_t read_meta(struct mp4 *f, uint64_t size)
473 uint64_t subsize, sumsize = 0;
475 uint8_t header_size = 0;
477 skip_bytes(f, 4); /* version (1), flags (3) */
478 while (sumsize < (size - (header_size + 4))) {
479 ret = atom_read_header(f, &atom_type, &header_size, &subsize);
482 if (subsize <= header_size + 4)
484 if (atom_type == ATOM_ILST) {
485 f->ilst_offset = get_position(f) - header_size;
486 f->ilst_size = subsize;
487 ret = read_ilst(f, subsize - (header_size + 4));
491 set_position(f, get_position(f) + subsize - header_size);
497 static bool need_atom(uint8_t atom_type, bool meta_only)
499 /* these are needed in any case */
510 /* meta-only opens don't need anything else */
513 /* these are only required for regular opens */
525 /* parse atoms that are sub atoms of other atoms */
526 static int parse_sub_atoms(struct mp4 *f, uint64_t total_size, bool meta_only)
529 uint64_t dest, size, end = get_position(f) + total_size;
531 for (dest = get_position(f); dest < end; set_position(f, dest)) {
532 uint8_t header_size, atom_type;
533 ret = atom_read_header(f, &atom_type, &header_size, &size);
537 return -E_MP4_CORRUPT;
538 dest = get_position(f) + size - header_size;
539 if (atom_type == ATOM_TRAK && f->track.state == ATS_SEEN_MP4A) {
540 f->track.state = ATS_TRACK_CHANGE;
543 if (atom_type == ATOM_UDTA) {
544 f->udta_offset = get_position(f) - header_size;
547 if (!need_atom(atom_type, meta_only))
550 case ATOM_STSZ: ret = read_stsz(f); break;
551 case ATOM_STTS: ret = read_stts(f); break;
552 case ATOM_STSC: ret = read_stsc(f); break;
553 case ATOM_STCO: ret = read_stco(f); break;
554 case ATOM_STSD: ret = read_stsd(f); break;
555 case ATOM_MDHD: ret = read_mdhd(f); break;
557 f->meta_offset = get_position(f) - header_size;
559 ret = read_meta(f, size);
562 ret = parse_sub_atoms(f, size - header_size, meta_only);
570 static int open_file(const struct mp4_callback *cb, bool meta_only, struct mp4 **result)
574 uint8_t atom_type, header_size;
575 struct mp4 *f = para_calloc(sizeof(*f));
578 while ((ret = atom_read_header(f, &atom_type, &header_size, &size)) > 0) {
579 f->last_atom = atom_type;
580 if (atom_type != ATOM_MOOV || size <= header_size) { /* skip */
581 set_position(f, get_position(f) + size - header_size);
584 f->moov_offset = get_position(f) - header_size;
586 ret = parse_sub_atoms(f, size - header_size, meta_only);
593 if (f->track.channel_count == 0)
595 ret = -E_MP4_BAD_SAMPLERATE;
596 if (f->track.sample_rate == 0)
606 int mp4_open_read(const struct mp4_callback *cb, struct mp4 **result)
612 ret = open_file(cb, false, &f);
615 ret = -E_MP4_BAD_SAMPLE_COUNT;
616 if (f->track.stsz_sample_count == 0)
625 void mp4_close(struct mp4 *f)
627 free(f->track.stsz_table);
628 free(f->track.stts_sample_count);
629 free(f->track.stsc_first_chunk);
630 free(f->track.stsc_samples_per_chunk);
631 free(f->track.stco_chunk_offset);
632 for (uint32_t n = 0; n < f->meta.count; n++) {
633 free(f->meta.tags[n].item);
634 free(f->meta.tags[n].value);
640 static int32_t chunk_of_sample(const struct mp4 *f, int32_t sample,
643 const struct mp4_track *t = &f->track;
644 uint32_t *fc = t->stsc_first_chunk, *spc = t->stsc_samples_per_chunk;
645 uint32_t chunk1, chunk1samples, n, total, k;
647 for (k = 1, total = 0; k < t->stsc_entry_count; k++, total += n) {
648 n = (fc[k] - fc[k - 1]) * spc[k - 1]; /* number of samples */
649 if (sample < total + n)
653 chunk1samples = spc[k - 1];
654 if (chunk1samples != 0)
655 *chunk = (sample - total) / chunk1samples + chunk1;
658 return total + (*chunk - chunk1) * chunk1samples;
662 * Return the number of milliseconds of the audio track.
664 * \param f As returned by \ref mp4_open_read(), must not be NULL.
666 uint64_t mp4_get_duration(const struct mp4 *f)
668 const struct mp4_track *t = &f->track;
670 if (t->time_scale == 0)
672 return t->duration * 1000 / t->time_scale;
675 int mp4_set_sample_position(struct mp4 *f, uint32_t sample)
677 const struct mp4_track *t = &f->track;
678 int32_t offset, chunk, chunk_sample;
679 uint32_t n, srs; /* sample range size */
681 if (sample >= t->stsz_sample_count)
682 return -ERRNO_TO_PARA_ERROR(EINVAL);
683 chunk_sample = chunk_of_sample(f, sample, &chunk);
684 if (t->stsz_sample_size > 0)
685 srs = (sample - chunk_sample) * t->stsz_sample_size;
687 for (srs = 0, n = chunk_sample; n < sample; n++)
688 srs += t->stsz_table[n];
690 if (t->stco_entry_count > 0 && chunk > t->stco_entry_count)
691 offset = t->stco_chunk_offset[t->stco_entry_count - 1];
692 else if (t->stco_entry_count > 0)
693 offset = t->stco_chunk_offset[chunk - 1];
696 set_position(f, offset + srs);
700 int mp4_get_sample_size(const struct mp4 *f, uint32_t sample, uint32_t *result)
702 const struct mp4_track *t = &f->track;
704 if (sample >= t->stsz_sample_count)
705 return -ERRNO_TO_PARA_ERROR(EINVAL);
706 if (t->stsz_sample_size != 0)
707 *result = t->stsz_sample_size;
709 *result = t->stsz_table[sample];
713 uint16_t mp4_get_sample_rate(const struct mp4 *f)
715 return f->track.sample_rate;
718 uint16_t mp4_get_channel_count(const struct mp4 *f)
720 return f->track.channel_count;
723 uint32_t mp4_num_samples(const struct mp4 *f)
725 const struct mp4_track *t = &f->track;
728 for (uint32_t n = 0; n < t->stts_entry_count; n++)
729 total += t->stts_sample_count[n];
733 int mp4_open_meta(const struct mp4_callback *cb, struct mp4 **result)
736 int ret = open_file(cb, true, &f);
740 if (f->udta_size == 0 || f->meta_size == 0 || f->ilst_size == 0) {
743 return -E_MP4_MISSING_ATOM;
750 * Return the metadata of an mp4 file.
752 * \param f As returned by either \ref mp4_open_read() or \ref mp4_open_meta().
754 * The caller is allowed to add, delete or modify the entries of the returned
755 * structure in order to pass the modified version to \ref mp4_meta_update().
757 struct mp4_metadata *mp4_get_meta(struct mp4 *f)
762 /** Total length of an on-disk metadata tag. */
763 #define TAG_LEN(_len) (24 + (_len))
764 static void create_ilst(const struct mp4_metadata *meta, uint8_t *out)
766 for (unsigned n = 0; n < meta->count; n++) {
767 struct mp4_tag *tag = meta->tags + n;
768 unsigned len = strlen(tag->value);
769 const char *atom_name;
771 if (!strcasecmp(tag->item, "title"))
772 atom_name = "\xA9" "nam";
773 else if (!strcasecmp(tag->item, "artist"))
774 atom_name = "\xA9" "ART";
775 else if (!strcasecmp(tag->item, "album"))
776 atom_name = "\xA9" "alb";
777 else if (!strcasecmp(tag->item, "date"))
778 atom_name = "\xA9" "day";
779 else if (!strcasecmp(tag->item, "comment"))
780 atom_name = "\xA9" "cmt";
783 write_u32_be(out, TAG_LEN(len));
784 memcpy(out + 4, atom_name, 4);
785 write_u32_be(out + 8, 8 /* data atom header */
786 + 8 /* flags + reserved */
788 memcpy(out + 12, "data", 4);
789 write_u32_be(out + 16, 1); /* flags */
790 write_u32_be(out + 20, 0); /* reserved */
791 memcpy(out + 24, tag->value, len);
796 static void *modify_moov(struct mp4 *f, uint32_t *out_size)
799 uint64_t total_base = f->moov_offset + 8;
800 uint32_t total_size = (uint32_t) (f->moov_size - 8);
801 uint32_t new_ilst_size = 0;
807 for (unsigned n = 0; n < f->meta.count; n++)
808 new_ilst_size += TAG_LEN(strlen(f->meta.tags[n].value));
809 size_delta = new_ilst_size - (f->ilst_size - 8);
810 *out_size = total_size + size_delta;
811 out_buffer = para_malloc(*out_size);
813 set_position(f, total_base);
814 ret = read_data(f, p_out, f->udta_offset - total_base);
817 p_out += f->udta_offset - total_base;
818 ret = read_int32(f, &tmp);
821 write_u32_be(p_out, tmp + size_delta);
823 ret = read_data(f, p_out, 4);
827 ret = read_data(f, p_out, f->meta_offset - f->udta_offset - 8);
830 p_out += f->meta_offset - f->udta_offset - 8;
831 ret = read_int32(f, &tmp);
834 write_u32_be(p_out, tmp + size_delta);
836 ret = read_data(f, p_out, 4);
840 ret = read_data(f, p_out, f->ilst_offset - f->meta_offset - 8);
843 p_out += f->ilst_offset - f->meta_offset - 8;
844 ret = read_int32(f, &tmp);
847 write_u32_be(p_out, tmp + size_delta);
849 ret = read_data(f, p_out, 4);
853 create_ilst(&f->meta, p_out);
854 p_out += new_ilst_size;
855 set_position(f, f->ilst_offset + f->ilst_size);
856 ret = read_data(f, p_out, total_size - (f->ilst_offset - total_base)
863 static int write_data(struct mp4 *f, void *data, size_t size)
866 ssize_t ret = f->cb->write(f->cb->user_data, data, size);
870 return -ERRNO_TO_PARA_ERROR(errno);
877 int mp4_meta_update(struct mp4 *f)
880 uint32_t new_moov_size;
881 uint8_t buf[8] = "----moov";
885 new_moov_data = modify_moov(f, &new_moov_size);
886 if (!new_moov_data ) {
890 if (f->last_atom != ATOM_MOOV) {
891 set_position(f, f->moov_offset + 4);
892 ret = write_data(f, "free", 4); /* rename old moov to free */
895 /* write new moov atom at EOF */
896 f->cb->seek(f->cb->user_data, 0, SEEK_END);
897 } else /* overwrite old moov atom */
898 set_position(f, f->moov_offset);
899 write_u32_be(buf, new_moov_size + 8);
900 ret = write_data(f, buf, sizeof(buf));
903 ret = write_data(f, new_moov_data, new_moov_size);
906 ret = f->cb->truncate(f->cb->user_data);
908 ret = -ERRNO_TO_PARA_ERROR(errno);
915 * Return the value of the given tag item.
917 * \param f Must not be NULL.
918 * \param item "artist", "title", "album", "comment", or "date".
920 * \return The function always returns NULL if the given item is not in the
921 * above list. Otherwise, if the file does not contain a tag for the given
922 * item, the function also returns NULL. Otherwise a copy of the tag value is
923 * returned and the caller should free this memory when it is no longer needed.
925 char *mp4_get_tag_value(const struct mp4 *f, const char *item)
927 for (unsigned n = 0; n < f->meta.count; n++)
928 if (!strcasecmp(f->meta.tags[n].item, item))
929 return para_strdup(f->meta.tags[n].value);