Libav
matroskadec.c
Go to the documentation of this file.
1 /*
2  * Matroska file demuxer
3  * Copyright (c) 2003-2008 The Libav Project
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
31 #include <stdio.h>
32 #include "avformat.h"
33 #include "internal.h"
34 #include "avio_internal.h"
35 /* For ff_codec_get_id(). */
36 #include "riff.h"
37 #include "isom.h"
38 #include "rmsipr.h"
39 #include "matroska.h"
40 #include "libavcodec/bytestream.h"
41 #include "libavcodec/mpeg4audio.h"
42 #include "libavutil/intfloat.h"
43 #include "libavutil/intreadwrite.h"
44 #include "libavutil/avstring.h"
45 #include "libavutil/lzo.h"
46 #include "libavutil/dict.h"
47 #include "libavutil/mathematics.h"
48 #if CONFIG_ZLIB
49 #include <zlib.h>
50 #endif
51 #if CONFIG_BZLIB
52 #include <bzlib.h>
53 #endif
54 
55 typedef enum {
66 } EbmlType;
67 
68 typedef const struct EbmlSyntax {
69  uint32_t id;
73  union {
74  uint64_t u;
75  double f;
76  const char *s;
77  const struct EbmlSyntax *n;
78  } def;
79 } EbmlSyntax;
80 
81 typedef struct {
82  int nb_elem;
83  void *elem;
84 } EbmlList;
85 
86 typedef struct {
87  int size;
89  int64_t pos;
90 } EbmlBin;
91 
92 typedef struct {
93  uint64_t version;
94  uint64_t max_size;
95  uint64_t id_length;
96  char *doctype;
97  uint64_t doctype_version;
98 } Ebml;
99 
100 typedef struct {
101  uint64_t algo;
104 
105 typedef struct {
106  uint64_t scope;
107  uint64_t type;
110 
111 typedef struct {
112  double frame_rate;
113  uint64_t display_width;
114  uint64_t display_height;
115  uint64_t pixel_width;
116  uint64_t pixel_height;
117  uint64_t fourcc;
119 
120 typedef struct {
121  double samplerate;
123  uint64_t bitdepth;
124  uint64_t channels;
125 
126  /* real audio header (extracted from extradata) */
132  int pkt_cnt;
133  uint64_t buf_timecode;
136 
137 typedef struct {
138  uint64_t num;
139  uint64_t uid;
140  uint64_t type;
141  char *name;
142  char *codec_id;
144  char *language;
145  double time_scale;
147  uint64_t flag_default;
148  uint64_t flag_forced;
152  uint64_t codec_delay;
153 
155  int64_t end_timecode;
157 } MatroskaTrack;
158 
159 typedef struct {
160  uint64_t uid;
161  char *filename;
162  char *mime;
164 
167 
168 typedef struct {
169  uint64_t start;
170  uint64_t end;
171  uint64_t uid;
172  char *title;
173 
176 
177 typedef struct {
178  uint64_t track;
179  uint64_t pos;
181 
182 typedef struct {
183  uint64_t time;
185 } MatroskaIndex;
186 
187 typedef struct {
188  char *name;
189  char *string;
190  char *lang;
191  uint64_t def;
193 } MatroskaTag;
194 
195 typedef struct {
196  char *type;
197  uint64_t typevalue;
198  uint64_t trackuid;
199  uint64_t chapteruid;
200  uint64_t attachuid;
202 
203 typedef struct {
206 } MatroskaTags;
207 
208 typedef struct {
209  uint64_t id;
210  uint64_t pos;
212 
213 typedef struct {
214  uint64_t start;
215  uint64_t length;
216 } MatroskaLevel;
217 
218 typedef struct {
219  uint64_t timecode;
222 
223 typedef struct {
225 
226  /* EBML stuff */
229  int level_up;
230  uint32_t current_id;
231 
232  uint64_t time_scale;
233  double duration;
234  char *title;
241 
242  /* byte position of the segment inside the stream */
243  int64_t segment_start;
244 
245  /* the packet queue */
249 
250  int done;
251 
252  /* What to skip before effectively reading a packet. */
255 
256  /* File has a CUES element, but we defer parsing until it is needed. */
258 
262 
263  /* File has SSA subtitles which prevent incremental cluster parsing. */
266 
267 typedef struct {
268  uint64_t duration;
269  int64_t reference;
270  uint64_t non_simple;
272 } MatroskaBlock;
273 
275  { EBML_ID_EBMLREADVERSION, EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
276  { EBML_ID_EBMLMAXSIZELENGTH, EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
277  { EBML_ID_EBMLMAXIDLENGTH, EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
278  { EBML_ID_DOCTYPE, EBML_STR, 0, offsetof(Ebml,doctype), {.s="(none)"} },
279  { EBML_ID_DOCTYPEREADVERSION, EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
282  { 0 }
283 };
284 
286  { EBML_ID_HEADER, EBML_NEST, 0, 0, {.n=ebml_header} },
287  { 0 }
288 };
289 
291  { MATROSKA_ID_TIMECODESCALE, EBML_UINT, 0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
293  { MATROSKA_ID_TITLE, EBML_UTF8, 0, offsetof(MatroskaDemuxContext,title) },
298  { 0 }
299 };
300 
302  { MATROSKA_ID_VIDEOFRAMERATE, EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
303  { MATROSKA_ID_VIDEODISPLAYWIDTH, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
304  { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
305  { MATROSKA_ID_VIDEOPIXELWIDTH, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
306  { MATROSKA_ID_VIDEOPIXELHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
307  { MATROSKA_ID_VIDEOCOLORSPACE, EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
316  { 0 }
317 };
318 
320  { MATROSKA_ID_AUDIOSAMPLINGFREQ, EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
321  { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
323  { MATROSKA_ID_AUDIOCHANNELS, EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
324  { 0 }
325 };
326 
330  { 0 }
331 };
332 
334  { MATROSKA_ID_ENCODINGSCOPE, EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
335  { MATROSKA_ID_ENCODINGTYPE, EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
336  { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
338  { 0 }
339 };
340 
342  { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
343  { 0 }
344 };
345 
347  { MATROSKA_ID_TRACKNUMBER, EBML_UINT, 0, offsetof(MatroskaTrack,num) },
349  { MATROSKA_ID_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
350  { MATROSKA_ID_TRACKTYPE, EBML_UINT, 0, offsetof(MatroskaTrack,type) },
352  { MATROSKA_ID_CODECPRIVATE, EBML_BIN, 0, offsetof(MatroskaTrack,codec_priv) },
353  { MATROSKA_ID_CODECDELAY, EBML_UINT, 0, offsetof(MatroskaTrack, codec_delay) },
354  { MATROSKA_ID_TRACKLANGUAGE, EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
355  { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
356  { MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
357  { MATROSKA_ID_TRACKFLAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
358  { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
359  { MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
360  { MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
361  { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
371  { 0 }
372 };
373 
375  { MATROSKA_ID_TRACKENTRY, EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
376  { 0 }
377 };
378 
380  { MATROSKA_ID_FILEUID, EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
381  { MATROSKA_ID_FILENAME, EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
382  { MATROSKA_ID_FILEMIMETYPE, EBML_STR, 0, offsetof(MatroskaAttachement,mime) },
383  { MATROSKA_ID_FILEDATA, EBML_BIN, 0, offsetof(MatroskaAttachement,bin) },
385  { 0 }
386 };
387 
389  { MATROSKA_ID_ATTACHEDFILE, EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
390  { 0 }
391 };
392 
394  { MATROSKA_ID_CHAPSTRING, EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
396  { 0 }
397 };
398 
402  { MATROSKA_ID_CHAPTERUID, EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
403  { MATROSKA_ID_CHAPTERDISPLAY, EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
408  { 0 }
409 };
410 
412  { MATROSKA_ID_CHAPTERATOM, EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
417  { 0 }
418 };
419 
421  { MATROSKA_ID_EDITIONENTRY, EBML_NEST, 0, 0, {.n=matroska_chapter} },
422  { 0 }
423 };
424 
426  { MATROSKA_ID_CUETRACK, EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
429  { 0 }
430 };
431 
433  { MATROSKA_ID_CUETIME, EBML_UINT, 0, offsetof(MatroskaIndex,time) },
434  { MATROSKA_ID_CUETRACKPOSITION, EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
435  { 0 }
436 };
437 
439  { MATROSKA_ID_POINTENTRY, EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
440  { 0 }
441 };
442 
444  { MATROSKA_ID_TAGNAME, EBML_UTF8, 0, offsetof(MatroskaTag,name) },
445  { MATROSKA_ID_TAGSTRING, EBML_UTF8, 0, offsetof(MatroskaTag,string) },
446  { MATROSKA_ID_TAGLANG, EBML_STR, 0, offsetof(MatroskaTag,lang), {.s="und"} },
447  { MATROSKA_ID_TAGDEFAULT, EBML_UINT, 0, offsetof(MatroskaTag,def) },
448  { MATROSKA_ID_TAGDEFAULT_BUG, EBML_UINT, 0, offsetof(MatroskaTag,def) },
449  { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
450  { 0 }
451 };
452 
455  { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
456  { MATROSKA_ID_TAGTARGETS_TRACKUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
458  { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
459  { 0 }
460 };
461 
463  { MATROSKA_ID_SIMPLETAG, EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
464  { MATROSKA_ID_TAGTARGETS, EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
465  { 0 }
466 };
467 
469  { MATROSKA_ID_TAG, EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
470  { 0 }
471 };
472 
474  { MATROSKA_ID_SEEKID, EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
475  { MATROSKA_ID_SEEKPOSITION, EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
476  { 0 }
477 };
478 
480  { MATROSKA_ID_SEEKENTRY, EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
481  { 0 }
482 };
483 
485  { MATROSKA_ID_INFO, EBML_NEST, 0, 0, {.n=matroska_info } },
486  { MATROSKA_ID_TRACKS, EBML_NEST, 0, 0, {.n=matroska_tracks } },
487  { MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, {.n=matroska_attachments} },
488  { MATROSKA_ID_CHAPTERS, EBML_NEST, 0, 0, {.n=matroska_chapters } },
489  { MATROSKA_ID_CUES, EBML_NEST, 0, 0, {.n=matroska_index } },
490  { MATROSKA_ID_TAGS, EBML_NEST, 0, 0, {.n=matroska_tags } },
491  { MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, {.n=matroska_seekhead } },
493  { 0 }
494 };
495 
497  { MATROSKA_ID_SEGMENT, EBML_NEST, 0, 0, {.n=matroska_segment } },
498  { 0 }
499 };
500 
502  { MATROSKA_ID_BLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) },
503  { MATROSKA_ID_SIMPLEBLOCK, EBML_BIN, 0, offsetof(MatroskaBlock,bin) },
505  { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
507  { 1, EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
508  { 0 }
509 };
510 
512  { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
513  { MATROSKA_ID_BLOCKGROUP, EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
514  { MATROSKA_ID_SIMPLEBLOCK, EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
517  { 0 }
518 };
519 
521  { MATROSKA_ID_CLUSTER, EBML_NEST, 0, 0, {.n=matroska_cluster} },
526  { 0 }
527 };
528 
530  { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
531  { MATROSKA_ID_BLOCKGROUP, EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
532  { MATROSKA_ID_SIMPLEBLOCK, EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
540  { 0 }
541 };
542 
544  { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
549  { 0 }
550 };
551 
553  { MATROSKA_ID_CLUSTER, EBML_NEST, 0, 0, {.n=matroska_cluster_incremental} },
558  { 0 }
559 };
560 
561 static const char *const matroska_doctypes[] = { "matroska", "webm" };
562 
563 static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
564 {
565  AVIOContext *pb = matroska->ctx->pb;
566  uint32_t id;
567  matroska->current_id = 0;
568  matroska->num_levels = 0;
569 
570  /* seek to next position to resync from */
571  if (avio_seek(pb, last_pos + 1, SEEK_SET) < 0)
572  goto eof;
573 
574  id = avio_rb32(pb);
575 
576  // try to find a toplevel element
577  while (!pb->eof_reached) {
578  if (id == MATROSKA_ID_INFO || id == MATROSKA_ID_TRACKS ||
579  id == MATROSKA_ID_CUES || id == MATROSKA_ID_TAGS ||
581  id == MATROSKA_ID_CLUSTER || id == MATROSKA_ID_CHAPTERS) {
582  matroska->current_id = id;
583  return 0;
584  }
585  id = (id << 8) | avio_r8(pb);
586  }
587 eof:
588  matroska->done = 1;
589  return AVERROR_EOF;
590 }
591 
592 /*
593  * Return: Whether we reached the end of a level in the hierarchy or not.
594  */
596 {
597  AVIOContext *pb = matroska->ctx->pb;
598  int64_t pos = avio_tell(pb);
599 
600  if (matroska->num_levels > 0) {
601  MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
602  if (pos - level->start >= level->length || matroska->current_id) {
603  matroska->num_levels--;
604  return 1;
605  }
606  }
607  return 0;
608 }
609 
610 /*
611  * Read: an "EBML number", which is defined as a variable-length
612  * array of bytes. The first byte indicates the length by giving a
613  * number of 0-bits followed by a one. The position of the first
614  * "one" bit inside the first byte indicates the length of this
615  * number.
616  * Returns: number of bytes read, < 0 on error
617  */
619  int max_size, uint64_t *number)
620 {
621  int read = 1, n = 1;
622  uint64_t total = 0;
623 
624  /* The first byte tells us the length in bytes - avio_r8() can normally
625  * return 0, but since that's not a valid first ebmlID byte, we can
626  * use it safely here to catch EOS. */
627  if (!(total = avio_r8(pb))) {
628  /* we might encounter EOS here */
629  if (!pb->eof_reached) {
630  int64_t pos = avio_tell(pb);
631  av_log(matroska->ctx, AV_LOG_ERROR,
632  "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
633  pos, pos);
634  return pb->error ? pb->error : AVERROR(EIO);
635  }
636  return AVERROR_EOF;
637  }
638 
639  /* get the length of the EBML number */
640  read = 8 - ff_log2_tab[total];
641  if (read > max_size) {
642  int64_t pos = avio_tell(pb) - 1;
643  av_log(matroska->ctx, AV_LOG_ERROR,
644  "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
645  (uint8_t) total, pos, pos);
646  return AVERROR_INVALIDDATA;
647  }
648 
649  /* read out length */
650  total ^= 1 << ff_log2_tab[total];
651  while (n++ < read)
652  total = (total << 8) | avio_r8(pb);
653 
654  *number = total;
655 
656  return read;
657 }
658 
665  uint64_t *number)
666 {
667  int res = ebml_read_num(matroska, pb, 8, number);
668  if (res > 0 && *number + 1 == 1ULL << (7 * res))
669  *number = 0xffffffffffffffULL;
670  return res;
671 }
672 
673 /*
674  * Read the next element as an unsigned int.
675  * 0 is success, < 0 is failure.
676  */
677 static int ebml_read_uint(AVIOContext *pb, int size, uint64_t *num)
678 {
679  int n = 0;
680 
681  if (size > 8)
682  return AVERROR_INVALIDDATA;
683 
684  /* big-endian ordering; build up number */
685  *num = 0;
686  while (n++ < size)
687  *num = (*num << 8) | avio_r8(pb);
688 
689  return 0;
690 }
691 
692 /*
693  * Read the next element as a float.
694  * 0 is success, < 0 is failure.
695  */
696 static int ebml_read_float(AVIOContext *pb, int size, double *num)
697 {
698  if (size == 0) {
699  *num = 0;
700  } else if (size == 4) {
701  *num = av_int2float(avio_rb32(pb));
702  } else if (size == 8){
703  *num = av_int2double(avio_rb64(pb));
704  } else
705  return AVERROR_INVALIDDATA;
706 
707  return 0;
708 }
709 
710 /*
711  * Read the next element as an ASCII string.
712  * 0 is success, < 0 is failure.
713  */
714 static int ebml_read_ascii(AVIOContext *pb, int size, char **str)
715 {
716  char *res;
717 
718  /* EBML strings are usually not 0-terminated, so we allocate one
719  * byte more, read the string and NULL-terminate it ourselves. */
720  if (!(res = av_malloc(size + 1)))
721  return AVERROR(ENOMEM);
722  if (avio_read(pb, (uint8_t *) res, size) != size) {
723  av_free(res);
724  return AVERROR(EIO);
725  }
726  (res)[size] = '\0';
727  av_free(*str);
728  *str = res;
729 
730  return 0;
731 }
732 
733 /*
734  * Read the next element as binary data.
735  * 0 is success, < 0 is failure.
736  */
737 static int ebml_read_binary(AVIOContext *pb, int length, EbmlBin *bin)
738 {
739  av_free(bin->data);
740  if (!(bin->data = av_malloc(length + FF_INPUT_BUFFER_PADDING_SIZE)))
741  return AVERROR(ENOMEM);
742 
743  memset(bin->data + length, 0, FF_INPUT_BUFFER_PADDING_SIZE);
744 
745  bin->size = length;
746  bin->pos = avio_tell(pb);
747  if (avio_read(pb, bin->data, length) != length) {
748  av_freep(&bin->data);
749  return AVERROR(EIO);
750  }
751 
752  return 0;
753 }
754 
755 /*
756  * Read the next element, but only the header. The contents
757  * are supposed to be sub-elements which can be read separately.
758  * 0 is success, < 0 is failure.
759  */
760 static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
761 {
762  AVIOContext *pb = matroska->ctx->pb;
764 
765  if (matroska->num_levels >= EBML_MAX_DEPTH) {
766  av_log(matroska->ctx, AV_LOG_ERROR,
767  "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
768  return AVERROR(ENOSYS);
769  }
770 
771  level = &matroska->levels[matroska->num_levels++];
772  level->start = avio_tell(pb);
773  level->length = length;
774 
775  return 0;
776 }
777 
778 /*
779  * Read signed/unsigned "EBML" numbers.
780  * Return: number of bytes processed, < 0 on error
781  */
783  uint8_t *data, uint32_t size, uint64_t *num)
784 {
785  AVIOContext pb;
786  ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
787  return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
788 }
789 
790 /*
791  * Same as above, but signed.
792  */
794  uint8_t *data, uint32_t size, int64_t *num)
795 {
796  uint64_t unum;
797  int res;
798 
799  /* read as unsigned number first */
800  if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
801  return res;
802 
803  /* make signed (weird way) */
804  *num = unum - ((1LL << (7*res - 1)) - 1);
805 
806  return res;
807 }
808 
809 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
810  EbmlSyntax *syntax, void *data);
811 
812 static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
813  uint32_t id, void *data)
814 {
815  int i;
816  for (i=0; syntax[i].id; i++)
817  if (id == syntax[i].id)
818  break;
819  if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
820  matroska->num_levels > 0 &&
821  matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
822  return 0; // we reached the end of an unknown size cluster
823  if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
824  av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
825  if (matroska->ctx->error_recognition & AV_EF_EXPLODE)
826  return AVERROR_INVALIDDATA;
827  }
828  return ebml_parse_elem(matroska, &syntax[i], data);
829 }
830 
831 static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
832  void *data)
833 {
834  if (!matroska->current_id) {
835  uint64_t id;
836  int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
837  if (res < 0)
838  return res;
839  matroska->current_id = id | 1 << 7*res;
840  }
841  return ebml_parse_id(matroska, syntax, matroska->current_id, data);
842 }
843 
844 static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
845  void *data)
846 {
847  int i, res = 0;
848 
849  for (i=0; syntax[i].id; i++)
850  switch (syntax[i].type) {
851  case EBML_UINT:
852  *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
853  break;
854  case EBML_FLOAT:
855  *(double *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
856  break;
857  case EBML_STR:
858  case EBML_UTF8:
859  // the default may be NULL
860  if (syntax[i].def.s) {
861  uint8_t **dst = (uint8_t**)((uint8_t*)data + syntax[i].data_offset);
862  *dst = av_strdup(syntax[i].def.s);
863  if (!*dst)
864  return AVERROR(ENOMEM);
865  }
866  break;
867  }
868 
869  while (!res && !ebml_level_end(matroska))
870  res = ebml_parse(matroska, syntax, data);
871 
872  return res;
873 }
874 
876  EbmlSyntax *syntax, void *data)
877 {
878  static const uint64_t max_lengths[EBML_TYPE_COUNT] = {
879  [EBML_UINT] = 8,
880  [EBML_FLOAT] = 8,
881  // max. 16 MB for strings
882  [EBML_STR] = 0x1000000,
883  [EBML_UTF8] = 0x1000000,
884  // max. 256 MB for binary data
885  [EBML_BIN] = 0x10000000,
886  // no limits for anything else
887  };
888  AVIOContext *pb = matroska->ctx->pb;
889  uint32_t id = syntax->id;
890  uint64_t length;
891  int res;
892 
893  data = (char *)data + syntax->data_offset;
894  if (syntax->list_elem_size) {
895  EbmlList *list = data;
896  if ((res = av_reallocp_array(&list->elem,
897  list->nb_elem + 1,
898  syntax->list_elem_size)) < 0) {
899  list->nb_elem = 0;
900  return res;
901  }
902  data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
903  memset(data, 0, syntax->list_elem_size);
904  list->nb_elem++;
905  }
906 
907  if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
908  matroska->current_id = 0;
909  if ((res = ebml_read_length(matroska, pb, &length)) < 0)
910  return res;
911  if (max_lengths[syntax->type] && length > max_lengths[syntax->type]) {
912  av_log(matroska->ctx, AV_LOG_ERROR,
913  "Invalid length 0x%"PRIx64" > 0x%"PRIx64" for syntax element %i\n",
914  length, max_lengths[syntax->type], syntax->type);
915  return AVERROR_INVALIDDATA;
916  }
917  }
918 
919  switch (syntax->type) {
920  case EBML_UINT: res = ebml_read_uint (pb, length, data); break;
921  case EBML_FLOAT: res = ebml_read_float (pb, length, data); break;
922  case EBML_STR:
923  case EBML_UTF8: res = ebml_read_ascii (pb, length, data); break;
924  case EBML_BIN: res = ebml_read_binary(pb, length, data); break;
925  case EBML_NEST: if ((res=ebml_read_master(matroska, length)) < 0)
926  return res;
927  if (id == MATROSKA_ID_SEGMENT)
928  matroska->segment_start = avio_tell(matroska->ctx->pb);
929  return ebml_parse_nest(matroska, syntax->def.n, data);
930  case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data);
931  case EBML_STOP: return 1;
932  default: return avio_skip(pb,length)<0 ? AVERROR(EIO) : 0;
933  }
934  if (res == AVERROR_INVALIDDATA)
935  av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
936  else if (res == AVERROR(EIO))
937  av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
938  return res;
939 }
940 
941 static void ebml_free(EbmlSyntax *syntax, void *data)
942 {
943  int i, j;
944  for (i=0; syntax[i].id; i++) {
945  void *data_off = (char *)data + syntax[i].data_offset;
946  switch (syntax[i].type) {
947  case EBML_STR:
948  case EBML_UTF8: av_freep(data_off); break;
949  case EBML_BIN: av_freep(&((EbmlBin *)data_off)->data); break;
950  case EBML_NEST:
951  if (syntax[i].list_elem_size) {
952  EbmlList *list = data_off;
953  char *ptr = list->elem;
954  for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
955  ebml_free(syntax[i].def.n, ptr);
956  av_free(list->elem);
957  } else
958  ebml_free(syntax[i].def.n, data_off);
959  default: break;
960  }
961  }
962 }
963 
964 
965 /*
966  * Autodetecting...
967  */
969 {
970  uint64_t total = 0;
971  int len_mask = 0x80, size = 1, n = 1, i;
972 
973  /* EBML header? */
974  if (AV_RB32(p->buf) != EBML_ID_HEADER)
975  return 0;
976 
977  /* length of header */
978  total = p->buf[4];
979  while (size <= 8 && !(total & len_mask)) {
980  size++;
981  len_mask >>= 1;
982  }
983  if (size > 8)
984  return 0;
985  total &= (len_mask - 1);
986  while (n < size)
987  total = (total << 8) | p->buf[4 + n++];
988 
989  /* Does the probe data contain the whole header? */
990  if (p->buf_size < 4 + size + total)
991  return 0;
992 
993  /* The header should contain a known document type. For now,
994  * we don't parse the whole header but simply check for the
995  * availability of that array of characters inside the header.
996  * Not fully fool-proof, but good enough. */
997  for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) {
998  int probelen = strlen(matroska_doctypes[i]);
999  if (total < probelen)
1000  continue;
1001  for (n = 4+size; n <= 4+size+total-probelen; n++)
1002  if (!memcmp(p->buf+n, matroska_doctypes[i], probelen))
1003  return AVPROBE_SCORE_MAX;
1004  }
1005 
1006  // probably valid EBML header but no recognized doctype
1007  return AVPROBE_SCORE_EXTENSION;
1008 }
1009 
1011  int num)
1012 {
1013  MatroskaTrack *tracks = matroska->tracks.elem;
1014  int i;
1015 
1016  for (i=0; i < matroska->tracks.nb_elem; i++)
1017  if (tracks[i].num == num)
1018  return &tracks[i];
1019 
1020  av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
1021  return NULL;
1022 }
1023 
1024 static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
1025  MatroskaTrack *track)
1026 {
1027  MatroskaTrackEncoding *encodings = track->encodings.elem;
1028  uint8_t* data = *buf;
1029  int isize = *buf_size;
1030  uint8_t* pkt_data = NULL;
1031  uint8_t av_unused *newpktdata;
1032  int pkt_size = isize;
1033  int result = 0;
1034  int olen;
1035 
1036  if (pkt_size >= 10000000)
1037  return AVERROR_INVALIDDATA;
1038 
1039  switch (encodings[0].compression.algo) {
1041  int header_size = encodings[0].compression.settings.size;
1042  uint8_t *header = encodings[0].compression.settings.data;
1043 
1044  if (!header_size)
1045  return 0;
1046 
1047  pkt_size = isize + header_size;
1048  pkt_data = av_malloc(pkt_size);
1049  if (!pkt_data)
1050  return AVERROR(ENOMEM);
1051 
1052  memcpy(pkt_data, header, header_size);
1053  memcpy(pkt_data + header_size, data, isize);
1054  break;
1055  }
1056 #if CONFIG_LZO
1058  do {
1059  olen = pkt_size *= 3;
1060  newpktdata = av_realloc(pkt_data, pkt_size + AV_LZO_OUTPUT_PADDING);
1061  if (!newpktdata) {
1062  result = AVERROR(ENOMEM);
1063  goto failed;
1064  }
1065  pkt_data = newpktdata;
1066  result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
1067  } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
1068  if (result) {
1069  result = AVERROR_INVALIDDATA;
1070  goto failed;
1071  }
1072  pkt_size -= olen;
1073  break;
1074 #endif
1075 #if CONFIG_ZLIB
1077  z_stream zstream = {0};
1078  if (inflateInit(&zstream) != Z_OK)
1079  return -1;
1080  zstream.next_in = data;
1081  zstream.avail_in = isize;
1082  do {
1083  pkt_size *= 3;
1084  newpktdata = av_realloc(pkt_data, pkt_size);
1085  if (!newpktdata) {
1086  inflateEnd(&zstream);
1087  goto failed;
1088  }
1089  pkt_data = newpktdata;
1090  zstream.avail_out = pkt_size - zstream.total_out;
1091  zstream.next_out = pkt_data + zstream.total_out;
1092  result = inflate(&zstream, Z_NO_FLUSH);
1093  } while (result==Z_OK && pkt_size<10000000);
1094  pkt_size = zstream.total_out;
1095  inflateEnd(&zstream);
1096  if (result != Z_STREAM_END) {
1097  if (result == Z_MEM_ERROR)
1098  result = AVERROR(ENOMEM);
1099  else
1100  result = AVERROR_INVALIDDATA;
1101  goto failed;
1102  }
1103  break;
1104  }
1105 #endif
1106 #if CONFIG_BZLIB
1108  bz_stream bzstream = {0};
1109  if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
1110  return -1;
1111  bzstream.next_in = data;
1112  bzstream.avail_in = isize;
1113  do {
1114  pkt_size *= 3;
1115  newpktdata = av_realloc(pkt_data, pkt_size);
1116  if (!newpktdata) {
1117  BZ2_bzDecompressEnd(&bzstream);
1118  goto failed;
1119  }
1120  pkt_data = newpktdata;
1121  bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
1122  bzstream.next_out = pkt_data + bzstream.total_out_lo32;
1123  result = BZ2_bzDecompress(&bzstream);
1124  } while (result==BZ_OK && pkt_size<10000000);
1125  pkt_size = bzstream.total_out_lo32;
1126  BZ2_bzDecompressEnd(&bzstream);
1127  if (result != BZ_STREAM_END) {
1128  if (result == BZ_MEM_ERROR)
1129  result = AVERROR(ENOMEM);
1130  else
1131  result = AVERROR_INVALIDDATA;
1132  goto failed;
1133  }
1134  break;
1135  }
1136 #endif
1137  default:
1138  return AVERROR_INVALIDDATA;
1139  }
1140 
1141  *buf = pkt_data;
1142  *buf_size = pkt_size;
1143  return 0;
1144  failed:
1145  av_free(pkt_data);
1146  return result;
1147 }
1148 
1150  AVPacket *pkt, uint64_t display_duration)
1151 {
1152  AVBufferRef *line;
1153  char *layer, *ptr = pkt->data, *end = ptr+pkt->size;
1154  for (; *ptr!=',' && ptr<end-1; ptr++);
1155  if (*ptr == ',')
1156  layer = ++ptr;
1157  for (; *ptr!=',' && ptr<end-1; ptr++);
1158  if (*ptr == ',') {
1159  int64_t end_pts = pkt->pts + display_duration;
1160  int sc = matroska->time_scale * pkt->pts / 10000000;
1161  int ec = matroska->time_scale * end_pts / 10000000;
1162  int sh, sm, ss, eh, em, es, len;
1163  sh = sc/360000; sc -= 360000*sh;
1164  sm = sc/ 6000; sc -= 6000*sm;
1165  ss = sc/ 100; sc -= 100*ss;
1166  eh = ec/360000; ec -= 360000*eh;
1167  em = ec/ 6000; ec -= 6000*em;
1168  es = ec/ 100; ec -= 100*es;
1169  *ptr++ = '\0';
1170  len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
1171  if (!(line = av_buffer_alloc(len)))
1172  return;
1173  snprintf(line->data, len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
1174  layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
1175  av_buffer_unref(&pkt->buf);
1176  pkt->buf = line;
1177  pkt->data = line->data;
1178  pkt->size = strlen(line->data);
1179  }
1180 }
1181 
1183 {
1184  int old_size = out->size;
1185  int ret = av_grow_packet(out, in->size);
1186  if (ret < 0)
1187  return ret;
1188 
1189  memcpy(out->data + old_size, in->data, in->size);
1190 
1191  av_free_packet(in);
1192  av_free(in);
1193  return 0;
1194 }
1195 
1197  AVDictionary **metadata, char *prefix)
1198 {
1199  MatroskaTag *tags = list->elem;
1200  char key[1024];
1201  int i;
1202 
1203  for (i=0; i < list->nb_elem; i++) {
1204  const char *lang = tags[i].lang && strcmp(tags[i].lang, "und") ?
1205  tags[i].lang : NULL;
1206 
1207  if (!tags[i].name) {
1208  av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
1209  continue;
1210  }
1211  if (prefix) snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
1212  else av_strlcpy(key, tags[i].name, sizeof(key));
1213  if (tags[i].def || !lang) {
1214  av_dict_set(metadata, key, tags[i].string, 0);
1215  if (tags[i].sub.nb_elem)
1216  matroska_convert_tag(s, &tags[i].sub, metadata, key);
1217  }
1218  if (lang) {
1219  av_strlcat(key, "-", sizeof(key));
1220  av_strlcat(key, lang, sizeof(key));
1221  av_dict_set(metadata, key, tags[i].string, 0);
1222  if (tags[i].sub.nb_elem)
1223  matroska_convert_tag(s, &tags[i].sub, metadata, key);
1224  }
1225  }
1227 }
1228 
1230 {
1231  MatroskaDemuxContext *matroska = s->priv_data;
1232  MatroskaTags *tags = matroska->tags.elem;
1233  int i, j;
1234 
1235  for (i=0; i < matroska->tags.nb_elem; i++) {
1236  if (tags[i].target.attachuid) {
1237  MatroskaAttachement *attachment = matroska->attachments.elem;
1238  for (j=0; j<matroska->attachments.nb_elem; j++)
1239  if (attachment[j].uid == tags[i].target.attachuid
1240  && attachment[j].stream)
1241  matroska_convert_tag(s, &tags[i].tag,
1242  &attachment[j].stream->metadata, NULL);
1243  } else if (tags[i].target.chapteruid) {
1244  MatroskaChapter *chapter = matroska->chapters.elem;
1245  for (j=0; j<matroska->chapters.nb_elem; j++)
1246  if (chapter[j].uid == tags[i].target.chapteruid
1247  && chapter[j].chapter)
1248  matroska_convert_tag(s, &tags[i].tag,
1249  &chapter[j].chapter->metadata, NULL);
1250  } else if (tags[i].target.trackuid) {
1251  MatroskaTrack *track = matroska->tracks.elem;
1252  for (j=0; j<matroska->tracks.nb_elem; j++)
1253  if (track[j].uid == tags[i].target.trackuid && track[j].stream)
1254  matroska_convert_tag(s, &tags[i].tag,
1255  &track[j].stream->metadata, NULL);
1256  } else {
1257  matroska_convert_tag(s, &tags[i].tag, &s->metadata,
1258  tags[i].target.type);
1259  }
1260  }
1261 }
1262 
1264 {
1265  EbmlList *seekhead_list = &matroska->seekhead;
1266  MatroskaSeekhead *seekhead = seekhead_list->elem;
1267  uint32_t level_up = matroska->level_up;
1268  int64_t before_pos = avio_tell(matroska->ctx->pb);
1269  uint32_t saved_id = matroska->current_id;
1271  int64_t offset;
1272  int ret = 0;
1273 
1274  if (idx >= seekhead_list->nb_elem
1275  || seekhead[idx].id == MATROSKA_ID_SEEKHEAD
1276  || seekhead[idx].id == MATROSKA_ID_CLUSTER)
1277  return 0;
1278 
1279  /* seek */
1280  offset = seekhead[idx].pos + matroska->segment_start;
1281  if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
1282  /* We don't want to lose our seekhead level, so we add
1283  * a dummy. This is a crude hack. */
1284  if (matroska->num_levels == EBML_MAX_DEPTH) {
1285  av_log(matroska->ctx, AV_LOG_INFO,
1286  "Max EBML element depth (%d) reached, "
1287  "cannot parse further.\n", EBML_MAX_DEPTH);
1288  ret = AVERROR_INVALIDDATA;
1289  } else {
1290  level.start = 0;
1291  level.length = (uint64_t)-1;
1292  matroska->levels[matroska->num_levels] = level;
1293  matroska->num_levels++;
1294  matroska->current_id = 0;
1295 
1296  ret = ebml_parse(matroska, matroska_segment, matroska);
1297 
1298  /* remove dummy level */
1299  while (matroska->num_levels) {
1300  uint64_t length = matroska->levels[--matroska->num_levels].length;
1301  if (length == (uint64_t)-1)
1302  break;
1303  }
1304  }
1305  }
1306  /* seek back */
1307  avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
1308  matroska->level_up = level_up;
1309  matroska->current_id = saved_id;
1310 
1311  return ret;
1312 }
1313 
1315 {
1316  EbmlList *seekhead_list = &matroska->seekhead;
1317  int64_t before_pos = avio_tell(matroska->ctx->pb);
1318  int i;
1319 
1320  // we should not do any seeking in the streaming case
1321  if (!matroska->ctx->pb->seekable ||
1322  (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
1323  return;
1324 
1325  for (i = 0; i < seekhead_list->nb_elem; i++) {
1326  MatroskaSeekhead *seekhead = seekhead_list->elem;
1327  if (seekhead[i].pos <= before_pos)
1328  continue;
1329 
1330  // defer cues parsing until we actually need cue data.
1331  if (seekhead[i].id == MATROSKA_ID_CUES) {
1332  matroska->cues_parsing_deferred = 1;
1333  continue;
1334  }
1335 
1336  if (matroska_parse_seekhead_entry(matroska, i) < 0)
1337  break;
1338  }
1339 }
1340 
1342  EbmlList *seekhead_list = &matroska->seekhead;
1343  MatroskaSeekhead *seekhead = seekhead_list->elem;
1344  EbmlList *index_list;
1346  int index_scale = 1;
1347  int i, j;
1348 
1349  for (i = 0; i < seekhead_list->nb_elem; i++)
1350  if (seekhead[i].id == MATROSKA_ID_CUES)
1351  break;
1352  assert(i <= seekhead_list->nb_elem);
1353 
1354  matroska_parse_seekhead_entry(matroska, i);
1355 
1356  index_list = &matroska->index;
1357  index = index_list->elem;
1358  if (index_list->nb_elem
1359  && index[0].time > 1E14/matroska->time_scale) {
1360  av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
1361  index_scale = matroska->time_scale;
1362  }
1363  for (i = 0; i < index_list->nb_elem; i++) {
1364  EbmlList *pos_list = &index[i].pos;
1365  MatroskaIndexPos *pos = pos_list->elem;
1366  for (j = 0; j < pos_list->nb_elem; j++) {
1367  MatroskaTrack *track = matroska_find_track_by_num(matroska, pos[j].track);
1368  if (track && track->stream)
1369  av_add_index_entry(track->stream,
1370  pos[j].pos + matroska->segment_start,
1371  index[i].time/index_scale, 0, 0,
1373  }
1374  }
1375 }
1376 
1378 {
1379  static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" };
1380  int profile;
1381 
1382  for (profile=0; profile<FF_ARRAY_ELEMS(aac_profiles); profile++)
1383  if (strstr(codec_id, aac_profiles[profile]))
1384  break;
1385  return profile + 1;
1386 }
1387 
1388 static int matroska_aac_sri(int samplerate)
1389 {
1390  int sri;
1391 
1392  for (sri=0; sri<FF_ARRAY_ELEMS(avpriv_mpeg4audio_sample_rates); sri++)
1393  if (avpriv_mpeg4audio_sample_rates[sri] == samplerate)
1394  break;
1395  return sri;
1396 }
1397 
1399 {
1400  MatroskaDemuxContext *matroska = s->priv_data;
1401  EbmlList *attachements_list = &matroska->attachments;
1402  MatroskaAttachement *attachements;
1403  EbmlList *chapters_list = &matroska->chapters;
1404  MatroskaChapter *chapters;
1405  MatroskaTrack *tracks;
1406  uint64_t max_start = 0;
1407  int64_t pos;
1408  Ebml ebml = { 0 };
1409  AVStream *st;
1410  int i, j, res;
1411 
1412  matroska->ctx = s;
1413 
1414  /* First read the EBML header. */
1415  if (ebml_parse(matroska, ebml_syntax, &ebml) || !ebml.doctype) {
1416  av_log(matroska->ctx, AV_LOG_ERROR, "EBML header parsing failed\n");
1417  ebml_free(ebml_syntax, &ebml);
1418  return AVERROR_INVALIDDATA;
1419  }
1420  if (ebml.version > EBML_VERSION || ebml.max_size > sizeof(uint64_t)
1421  || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) {
1422  av_log(matroska->ctx, AV_LOG_ERROR,
1423  "EBML header using unsupported features\n"
1424  "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
1425  ebml.version, ebml.doctype, ebml.doctype_version);
1426  ebml_free(ebml_syntax, &ebml);
1427  return AVERROR_PATCHWELCOME;
1428  }
1429  for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
1430  if (!strcmp(ebml.doctype, matroska_doctypes[i]))
1431  break;
1432  if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
1433  av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
1434  if (matroska->ctx->error_recognition & AV_EF_EXPLODE) {
1435  ebml_free(ebml_syntax, &ebml);
1436  return AVERROR_INVALIDDATA;
1437  }
1438  }
1439  ebml_free(ebml_syntax, &ebml);
1440 
1441  /* The next thing is a segment. */
1442  pos = avio_tell(matroska->ctx->pb);
1443  res = ebml_parse(matroska, matroska_segments, matroska);
1444  // try resyncing until we find a EBML_STOP type element.
1445  while (res != 1) {
1446  res = matroska_resync(matroska, pos);
1447  if (res < 0)
1448  return res;
1449  pos = avio_tell(matroska->ctx->pb);
1450  res = ebml_parse(matroska, matroska_segment, matroska);
1451  }
1452  matroska_execute_seekhead(matroska);
1453 
1454  if (!matroska->time_scale)
1455  matroska->time_scale = 1000000;
1456  if (matroska->duration)
1457  matroska->ctx->duration = matroska->duration * matroska->time_scale
1458  * 1000 / AV_TIME_BASE;
1459  av_dict_set(&s->metadata, "title", matroska->title, 0);
1460 
1461  tracks = matroska->tracks.elem;
1462  for (i=0; i < matroska->tracks.nb_elem; i++) {
1463  MatroskaTrack *track = &tracks[i];
1465  EbmlList *encodings_list = &track->encodings;
1466  MatroskaTrackEncoding *encodings = encodings_list->elem;
1467  uint8_t *extradata = NULL;
1468  int extradata_size = 0;
1469  int extradata_offset = 0;
1470  AVIOContext b;
1471 
1472  /* Apply some sanity checks. */
1473  if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
1474  track->type != MATROSKA_TRACK_TYPE_AUDIO &&
1475  track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1476  av_log(matroska->ctx, AV_LOG_INFO,
1477  "Unknown or unsupported track type %"PRIu64"\n",
1478  track->type);
1479  continue;
1480  }
1481  if (track->codec_id == NULL)
1482  continue;
1483 
1484  if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1485  if (!track->default_duration && track->video.frame_rate > 0)
1486  track->default_duration = 1000000000/track->video.frame_rate;
1487  if (!track->video.display_width)
1488  track->video.display_width = track->video.pixel_width;
1489  if (!track->video.display_height)
1490  track->video.display_height = track->video.pixel_height;
1491  } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1492  if (!track->audio.out_samplerate)
1493  track->audio.out_samplerate = track->audio.samplerate;
1494  }
1495  if (encodings_list->nb_elem > 1) {
1496  av_log(matroska->ctx, AV_LOG_ERROR,
1497  "Multiple combined encodings not supported");
1498  } else if (encodings_list->nb_elem == 1) {
1499  if (encodings[0].type ||
1500  (
1501 #if CONFIG_ZLIB
1502  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
1503 #endif
1504 #if CONFIG_BZLIB
1506 #endif
1507 #if CONFIG_LZO
1509 #endif
1511  encodings[0].scope = 0;
1512  av_log(matroska->ctx, AV_LOG_ERROR,
1513  "Unsupported encoding type");
1514  } else if (track->codec_priv.size && encodings[0].scope&2) {
1515  uint8_t *codec_priv = track->codec_priv.data;
1516  int ret = matroska_decode_buffer(&track->codec_priv.data,
1517  &track->codec_priv.size,
1518  track);
1519  if (ret < 0) {
1520  track->codec_priv.data = NULL;
1521  track->codec_priv.size = 0;
1522  av_log(matroska->ctx, AV_LOG_ERROR,
1523  "Failed to decode codec private data\n");
1524  }
1525 
1526  if (codec_priv != track->codec_priv.data)
1527  av_free(codec_priv);
1528  }
1529  }
1530 
1531  for(j=0; ff_mkv_codec_tags[j].id != AV_CODEC_ID_NONE; j++){
1532  if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
1533  strlen(ff_mkv_codec_tags[j].str))){
1534  codec_id= ff_mkv_codec_tags[j].id;
1535  break;
1536  }
1537  }
1538 
1539  st = track->stream = avformat_new_stream(s, NULL);
1540  if (st == NULL)
1541  return AVERROR(ENOMEM);
1542 
1543  if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1544  && track->codec_priv.size >= 40
1545  && track->codec_priv.data != NULL) {
1546  track->ms_compat = 1;
1547  track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
1548  codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc);
1549  extradata_offset = 40;
1550  } else if (!strcmp(track->codec_id, "A_MS/ACM")
1551  && track->codec_priv.size >= 14
1552  && track->codec_priv.data != NULL) {
1553  int ret;
1554  ffio_init_context(&b, track->codec_priv.data, track->codec_priv.size,
1555  0, NULL, NULL, NULL, NULL);
1556  ret = ff_get_wav_header(&b, st->codec, track->codec_priv.size);
1557  if (ret < 0)
1558  return ret;
1559  codec_id = st->codec->codec_id;
1560  extradata_offset = FFMIN(track->codec_priv.size, 18);
1561  } else if (!strcmp(track->codec_id, "V_QUICKTIME")
1562  && (track->codec_priv.size >= 86)
1563  && (track->codec_priv.data != NULL)) {
1564  track->video.fourcc = AV_RL32(track->codec_priv.data);
1566  } else if (codec_id == AV_CODEC_ID_PCM_S16BE) {
1567  switch (track->audio.bitdepth) {
1568  case 8: codec_id = AV_CODEC_ID_PCM_U8; break;
1569  case 24: codec_id = AV_CODEC_ID_PCM_S24BE; break;
1570  case 32: codec_id = AV_CODEC_ID_PCM_S32BE; break;
1571  }
1572  } else if (codec_id == AV_CODEC_ID_PCM_S16LE) {
1573  switch (track->audio.bitdepth) {
1574  case 8: codec_id = AV_CODEC_ID_PCM_U8; break;
1575  case 24: codec_id = AV_CODEC_ID_PCM_S24LE; break;
1576  case 32: codec_id = AV_CODEC_ID_PCM_S32LE; break;
1577  }
1578  } else if (codec_id==AV_CODEC_ID_PCM_F32LE && track->audio.bitdepth==64) {
1579  codec_id = AV_CODEC_ID_PCM_F64LE;
1580  } else if (codec_id == AV_CODEC_ID_AAC && !track->codec_priv.size) {
1581  int profile = matroska_aac_profile(track->codec_id);
1582  int sri = matroska_aac_sri(track->audio.samplerate);
1583  extradata = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
1584  if (extradata == NULL)
1585  return AVERROR(ENOMEM);
1586  extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
1587  extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
1588  if (strstr(track->codec_id, "SBR")) {
1589  sri = matroska_aac_sri(track->audio.out_samplerate);
1590  extradata[2] = 0x56;
1591  extradata[3] = 0xE5;
1592  extradata[4] = 0x80 | (sri<<3);
1593  extradata_size = 5;
1594  } else
1595  extradata_size = 2;
1596  } else if (codec_id == AV_CODEC_ID_ALAC && track->codec_priv.size) {
1597  /* Only ALAC's magic cookie is stored in Matroska's track headers.
1598  Create the "atom size", "tag", and "tag version" fields the
1599  decoder expects manually. */
1600  extradata_size = 12 + track->codec_priv.size;
1601  extradata = av_mallocz(extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
1602  if (extradata == NULL)
1603  return AVERROR(ENOMEM);
1604  AV_WB32(extradata, extradata_size);
1605  memcpy(&extradata[4], "alac", 4);
1606  AV_WB32(&extradata[8], 0);
1607  memcpy(&extradata[12], track->codec_priv.data,
1608  track->codec_priv.size);
1609  } else if (codec_id == AV_CODEC_ID_TTA) {
1610  extradata_size = 30;
1611  extradata = av_mallocz(extradata_size);
1612  if (extradata == NULL)
1613  return AVERROR(ENOMEM);
1614  ffio_init_context(&b, extradata, extradata_size, 1,
1615  NULL, NULL, NULL, NULL);
1616  avio_write(&b, "TTA1", 4);
1617  avio_wl16(&b, 1);
1618  avio_wl16(&b, track->audio.channels);
1619  avio_wl16(&b, track->audio.bitdepth);
1620  avio_wl32(&b, track->audio.out_samplerate);
1621  avio_wl32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1622  } else if (codec_id == AV_CODEC_ID_RV10 || codec_id == AV_CODEC_ID_RV20 ||
1623  codec_id == AV_CODEC_ID_RV30 || codec_id == AV_CODEC_ID_RV40) {
1624  extradata_offset = 26;
1625  } else if (codec_id == AV_CODEC_ID_RA_144) {
1626  track->audio.out_samplerate = 8000;
1627  track->audio.channels = 1;
1628  } else if (codec_id == AV_CODEC_ID_RA_288 || codec_id == AV_CODEC_ID_COOK ||
1629  codec_id == AV_CODEC_ID_ATRAC3 || codec_id == AV_CODEC_ID_SIPR) {
1630  int flavor;
1631  ffio_init_context(&b, track->codec_priv.data,track->codec_priv.size,
1632  0, NULL, NULL, NULL, NULL);
1633  avio_skip(&b, 22);
1634  flavor = avio_rb16(&b);
1635  track->audio.coded_framesize = avio_rb32(&b);
1636  avio_skip(&b, 12);
1637  track->audio.sub_packet_h = avio_rb16(&b);
1638  track->audio.frame_size = avio_rb16(&b);
1639  track->audio.sub_packet_size = avio_rb16(&b);
1640  if (flavor <= 0 || track->audio.coded_framesize <= 0 ||
1641  track->audio.sub_packet_h <= 0 || track->audio.frame_size <= 0 ||
1642  track->audio.sub_packet_size <= 0)
1643  return AVERROR_INVALIDDATA;
1644  track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
1645  if (codec_id == AV_CODEC_ID_RA_288) {
1646  st->codec->block_align = track->audio.coded_framesize;
1647  track->codec_priv.size = 0;
1648  } else {
1649  if (codec_id == AV_CODEC_ID_SIPR && flavor < 4) {
1650  const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 };
1651  track->audio.sub_packet_size = ff_sipr_subpk_size[flavor];
1652  st->codec->bit_rate = sipr_bit_rate[flavor];
1653  }
1654  st->codec->block_align = track->audio.sub_packet_size;
1655  extradata_offset = 78;
1656  }
1657  }
1658  track->codec_priv.size -= extradata_offset;
1659 
1660  if (codec_id == AV_CODEC_ID_NONE)
1661  av_log(matroska->ctx, AV_LOG_INFO,
1662  "Unknown/unsupported AVCodecID %s.\n", track->codec_id);
1663 
1664  if (track->time_scale < 0.01)
1665  track->time_scale = 1.0;
1666  avpriv_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1667 
1668  /* convert the delay from ns to the track timebase */
1669  track->codec_delay = av_rescale_q(track->codec_delay,
1670  (AVRational){ 1, 1000000000 },
1671  st->time_base);
1672 
1673  st->codec->codec_id = codec_id;
1674  st->start_time = 0;
1675  if (strcmp(track->language, "und"))
1676  av_dict_set(&st->metadata, "language", track->language, 0);
1677  av_dict_set(&st->metadata, "title", track->name, 0);
1678 
1679  if (track->flag_default)
1681  if (track->flag_forced)
1683 
1684  if (!st->codec->extradata) {
1685  if(extradata){
1686  st->codec->extradata = extradata;
1687  st->codec->extradata_size = extradata_size;
1688  } else if(track->codec_priv.data && track->codec_priv.size > 0){
1689  st->codec->extradata = av_mallocz(track->codec_priv.size +
1691  if(st->codec->extradata == NULL)
1692  return AVERROR(ENOMEM);
1693  st->codec->extradata_size = track->codec_priv.size;
1694  memcpy(st->codec->extradata,
1695  track->codec_priv.data + extradata_offset,
1696  track->codec_priv.size);
1697  }
1698  }
1699 
1700  if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1702  st->codec->codec_tag = track->video.fourcc;
1703  st->codec->width = track->video.pixel_width;
1704  st->codec->height = track->video.pixel_height;
1706  &st->sample_aspect_ratio.den,
1707  st->codec->height * track->video.display_width,
1708  st->codec-> width * track->video.display_height,
1709  255);
1710  if (st->codec->codec_id != AV_CODEC_ID_H264 &&
1713  if (track->default_duration) {
1715  1000000000, track->default_duration, 30000);
1716  }
1717  } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1719  st->codec->sample_rate = track->audio.out_samplerate;
1720  st->codec->channels = track->audio.channels;
1721  if (st->codec->codec_id != AV_CODEC_ID_AAC)
1723  } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1725  if (st->codec->codec_id == AV_CODEC_ID_SSA)
1726  matroska->contains_ssa = 1;
1727  }
1728  }
1729 
1730  attachements = attachements_list->elem;
1731  for (j=0; j<attachements_list->nb_elem; j++) {
1732  if (!(attachements[j].filename && attachements[j].mime &&
1733  attachements[j].bin.data && attachements[j].bin.size > 0)) {
1734  av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
1735  } else {
1736  AVStream *st = avformat_new_stream(s, NULL);
1737  if (st == NULL)
1738  break;
1739  av_dict_set(&st->metadata, "filename",attachements[j].filename, 0);
1740  av_dict_set(&st->metadata, "mimetype", attachements[j].mime, 0);
1743  st->codec->extradata = av_malloc(attachements[j].bin.size);
1744  if(st->codec->extradata == NULL)
1745  break;
1746  st->codec->extradata_size = attachements[j].bin.size;
1747  memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);
1748 
1749  for (i=0; ff_mkv_mime_tags[i].id != AV_CODEC_ID_NONE; i++) {
1750  if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
1751  strlen(ff_mkv_mime_tags[i].str))) {
1752  st->codec->codec_id = ff_mkv_mime_tags[i].id;
1753  break;
1754  }
1755  }
1756  attachements[j].stream = st;
1757  }
1758  }
1759 
1760  chapters = chapters_list->elem;
1761  for (i=0; i<chapters_list->nb_elem; i++)
1762  if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid
1763  && (max_start==0 || chapters[i].start > max_start)) {
1764  chapters[i].chapter =
1765  avpriv_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
1766  chapters[i].start, chapters[i].end,
1767  chapters[i].title);
1768  av_dict_set(&chapters[i].chapter->metadata,
1769  "title", chapters[i].title, 0);
1770  max_start = chapters[i].start;
1771  }
1772 
1774 
1775  return 0;
1776 }
1777 
1778 /*
1779  * Put one packet in an application-supplied AVPacket struct.
1780  * Returns 0 on success or -1 on failure.
1781  */
1783  AVPacket *pkt)
1784 {
1785  if (matroska->num_packets > 0) {
1786  memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
1787  av_free(matroska->packets[0]);
1788  if (matroska->num_packets > 1) {
1789  void *newpackets;
1790  memmove(&matroska->packets[0], &matroska->packets[1],
1791  (matroska->num_packets - 1) * sizeof(AVPacket *));
1792  newpackets = av_realloc(matroska->packets,
1793  (matroska->num_packets - 1) * sizeof(AVPacket *));
1794  if (newpackets)
1795  matroska->packets = newpackets;
1796  } else {
1797  av_freep(&matroska->packets);
1798  matroska->prev_pkt = NULL;
1799  }
1800  matroska->num_packets--;
1801  return 0;
1802  }
1803 
1804  return -1;
1805 }
1806 
1807 /*
1808  * Free all packets in our internal queue.
1809  */
1811 {
1812  matroska->prev_pkt = NULL;
1813  if (matroska->packets) {
1814  int n;
1815  for (n = 0; n < matroska->num_packets; n++) {
1816  av_free_packet(matroska->packets[n]);
1817  av_free(matroska->packets[n]);
1818  }
1819  av_freep(&matroska->packets);
1820  matroska->num_packets = 0;
1821  }
1822 }
1823 
1825  int* buf_size, int type,
1826  uint32_t **lace_buf, int *laces)
1827 {
1828  int res = 0, n, size = *buf_size;
1829  uint8_t *data = *buf;
1830  uint32_t *lace_size;
1831 
1832  if (!type) {
1833  *laces = 1;
1834  *lace_buf = av_mallocz(sizeof(int));
1835  if (!*lace_buf)
1836  return AVERROR(ENOMEM);
1837 
1838  *lace_buf[0] = size;
1839  return 0;
1840  }
1841 
1842  assert(size > 0);
1843  *laces = *data + 1;
1844  data += 1;
1845  size -= 1;
1846  lace_size = av_mallocz(*laces * sizeof(int));
1847  if (!lace_size)
1848  return AVERROR(ENOMEM);
1849 
1850  switch (type) {
1851  case 0x1: /* Xiph lacing */ {
1852  uint8_t temp;
1853  uint32_t total = 0;
1854  for (n = 0; res == 0 && n < *laces - 1; n++) {
1855  while (1) {
1856  if (size == 0) {
1857  res = AVERROR_EOF;
1858  break;
1859  }
1860  temp = *data;
1861  lace_size[n] += temp;
1862  data += 1;
1863  size -= 1;
1864  if (temp != 0xff)
1865  break;
1866  }
1867  total += lace_size[n];
1868  }
1869  if (size <= total) {
1870  res = AVERROR_INVALIDDATA;
1871  break;
1872  }
1873 
1874  lace_size[n] = size - total;
1875  break;
1876  }
1877 
1878  case 0x2: /* fixed-size lacing */
1879  if (size % (*laces)) {
1880  res = AVERROR_INVALIDDATA;
1881  break;
1882  }
1883  for (n = 0; n < *laces; n++)
1884  lace_size[n] = size / *laces;
1885  break;
1886 
1887  case 0x3: /* EBML lacing */ {
1888  uint64_t num;
1889  uint64_t total;
1890  n = matroska_ebmlnum_uint(matroska, data, size, &num);
1891  if (n < 0) {
1892  av_log(matroska->ctx, AV_LOG_INFO,
1893  "EBML block data error\n");
1894  res = n;
1895  break;
1896  }
1897  data += n;
1898  size -= n;
1899  total = lace_size[0] = num;
1900  for (n = 1; res == 0 && n < *laces - 1; n++) {
1901  int64_t snum;
1902  int r;
1903  r = matroska_ebmlnum_sint(matroska, data, size, &snum);
1904  if (r < 0) {
1905  av_log(matroska->ctx, AV_LOG_INFO,
1906  "EBML block data error\n");
1907  res = r;
1908  break;
1909  }
1910  data += r;
1911  size -= r;
1912  lace_size[n] = lace_size[n - 1] + snum;
1913  total += lace_size[n];
1914  }
1915  if (size <= total) {
1916  res = AVERROR_INVALIDDATA;
1917  break;
1918  }
1919  lace_size[*laces - 1] = size - total;
1920  break;
1921  }
1922  }
1923 
1924  *buf = data;
1925  *lace_buf = lace_size;
1926  *buf_size = size;
1927 
1928  return res;
1929 }
1930 
1932  MatroskaTrack *track,
1933  AVStream *st,
1934  uint8_t *data, int size,
1935  uint64_t timecode, uint64_t duration,
1936  int64_t pos)
1937 {
1938  int a = st->codec->block_align;
1939  int sps = track->audio.sub_packet_size;
1940  int cfs = track->audio.coded_framesize;
1941  int h = track->audio.sub_packet_h;
1942  int y = track->audio.sub_packet_cnt;
1943  int w = track->audio.frame_size;
1944  int x;
1945 
1946  if (!track->audio.pkt_cnt) {
1947  if (track->audio.sub_packet_cnt == 0)
1948  track->audio.buf_timecode = timecode;
1949  if (st->codec->codec_id == AV_CODEC_ID_RA_288) {
1950  if (size < cfs * h / 2) {
1951  av_log(matroska->ctx, AV_LOG_ERROR,
1952  "Corrupt int4 RM-style audio packet size\n");
1953  return AVERROR_INVALIDDATA;
1954  }
1955  for (x=0; x<h/2; x++)
1956  memcpy(track->audio.buf+x*2*w+y*cfs,
1957  data+x*cfs, cfs);
1958  } else if (st->codec->codec_id == AV_CODEC_ID_SIPR) {
1959  if (size < w) {
1960  av_log(matroska->ctx, AV_LOG_ERROR,
1961  "Corrupt sipr RM-style audio packet size\n");
1962  return AVERROR_INVALIDDATA;
1963  }
1964  memcpy(track->audio.buf + y*w, data, w);
1965  } else {
1966  if (size < sps * w / sps) {
1967  av_log(matroska->ctx, AV_LOG_ERROR,
1968  "Corrupt generic RM-style audio packet size\n");
1969  return AVERROR_INVALIDDATA;
1970  }
1971  for (x=0; x<w/sps; x++)
1972  memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
1973  }
1974 
1975  if (++track->audio.sub_packet_cnt >= h) {
1976  if (st->codec->codec_id == AV_CODEC_ID_SIPR)
1977  ff_rm_reorder_sipr_data(track->audio.buf, h, w);
1978  track->audio.sub_packet_cnt = 0;
1979  track->audio.pkt_cnt = h*w / a;
1980  }
1981  }
1982 
1983  while (track->audio.pkt_cnt) {
1984  AVPacket *pkt = av_mallocz(sizeof(AVPacket));
1985  av_new_packet(pkt, a);
1986  memcpy(pkt->data, track->audio.buf
1987  + a * (h*w / a - track->audio.pkt_cnt--), a);
1988  pkt->pts = track->audio.buf_timecode;
1990  pkt->pos = pos;
1991  pkt->stream_index = st->index;
1992  dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1993  }
1994 
1995  return 0;
1996 }
1997 
1998 /* reconstruct full wavpack blocks from mangled matroska ones */
2000  uint8_t **pdst, int *size)
2001 {
2002  uint8_t *dst = NULL;
2003  int dstlen = 0;
2004  int srclen = *size;
2005  uint32_t samples;
2006  uint16_t ver;
2007  int ret, offset = 0;
2008 
2009  if (srclen < 12 || track->stream->codec->extradata_size < 2)
2010  return AVERROR_INVALIDDATA;
2011 
2012  ver = AV_RL16(track->stream->codec->extradata);
2013 
2014  samples = AV_RL32(src);
2015  src += 4;
2016  srclen -= 4;
2017 
2018  while (srclen >= 8) {
2019  int multiblock;
2020  uint32_t blocksize;
2021  uint8_t *tmp;
2022 
2023  uint32_t flags = AV_RL32(src);
2024  uint32_t crc = AV_RL32(src + 4);
2025  src += 8;
2026  srclen -= 8;
2027 
2028  multiblock = (flags & 0x1800) != 0x1800;
2029  if (multiblock) {
2030  if (srclen < 4) {
2031  ret = AVERROR_INVALIDDATA;
2032  goto fail;
2033  }
2034  blocksize = AV_RL32(src);
2035  src += 4;
2036  srclen -= 4;
2037  } else
2038  blocksize = srclen;
2039 
2040  if (blocksize > srclen) {
2041  ret = AVERROR_INVALIDDATA;
2042  goto fail;
2043  }
2044 
2045  tmp = av_realloc(dst, dstlen + blocksize + 32);
2046  if (!tmp) {
2047  ret = AVERROR(ENOMEM);
2048  goto fail;
2049  }
2050  dst = tmp;
2051  dstlen += blocksize + 32;
2052 
2053  AV_WL32(dst + offset, MKTAG('w', 'v', 'p', 'k')); // tag
2054  AV_WL32(dst + offset + 4, blocksize + 24); // blocksize - 8
2055  AV_WL16(dst + offset + 8, ver); // version
2056  AV_WL16(dst + offset + 10, 0); // track/index_no
2057  AV_WL32(dst + offset + 12, 0); // total samples
2058  AV_WL32(dst + offset + 16, 0); // block index
2059  AV_WL32(dst + offset + 20, samples); // number of samples
2060  AV_WL32(dst + offset + 24, flags); // flags
2061  AV_WL32(dst + offset + 28, crc); // crc
2062  memcpy (dst + offset + 32, src, blocksize); // block data
2063 
2064  src += blocksize;
2065  srclen -= blocksize;
2066  offset += blocksize + 32;
2067  }
2068 
2069  *pdst = dst;
2070  *size = dstlen;
2071 
2072  return 0;
2073 
2074 fail:
2075  av_freep(&dst);
2076  return ret;
2077 }
2078 
2080  MatroskaTrack *track,
2081  AVStream *st,
2082  uint8_t *data, int pkt_size,
2083  uint64_t timecode, uint64_t duration,
2084  int64_t pos, int is_keyframe)
2085 {
2086  MatroskaTrackEncoding *encodings = track->encodings.elem;
2087  uint8_t *pkt_data = data;
2088  int offset = 0, res;
2089  AVPacket *pkt;
2090 
2091  if (encodings && encodings->scope & 1) {
2092  res = matroska_decode_buffer(&pkt_data, &pkt_size, track);
2093  if (res < 0)
2094  return res;
2095  }
2096 
2097  if (st->codec->codec_id == AV_CODEC_ID_WAVPACK) {
2098  uint8_t *wv_data;
2099  res = matroska_parse_wavpack(track, pkt_data, &wv_data, &pkt_size);
2100  if (res < 0) {
2101  av_log(matroska->ctx, AV_LOG_ERROR, "Error parsing a wavpack block.\n");
2102  goto fail;
2103  }
2104  if (pkt_data != data)
2105  av_freep(&pkt_data);
2106  pkt_data = wv_data;
2107  }
2108 
2109  if (st->codec->codec_id == AV_CODEC_ID_PRORES)
2110  offset = 8;
2111 
2112  pkt = av_mallocz(sizeof(AVPacket));
2113  /* XXX: prevent data copy... */
2114  if (av_new_packet(pkt, pkt_size + offset) < 0) {
2115  av_free(pkt);
2116  return AVERROR(ENOMEM);
2117  }
2118 
2119  if (st->codec->codec_id == AV_CODEC_ID_PRORES) {
2120  uint8_t *buf = pkt->data;
2121  bytestream_put_be32(&buf, pkt_size);
2122  bytestream_put_be32(&buf, MKBETAG('i', 'c', 'p', 'f'));
2123  }
2124 
2125  memcpy(pkt->data + offset, pkt_data, pkt_size);
2126 
2127  if (pkt_data != data)
2128  av_free(pkt_data);
2129 
2130  pkt->flags = is_keyframe;
2131  pkt->stream_index = st->index;
2132 
2133  if (track->ms_compat)
2134  pkt->dts = timecode;
2135  else
2136  pkt->pts = timecode;
2137  pkt->pos = pos;
2138  if (st->codec->codec_id == AV_CODEC_ID_TEXT)
2140  else if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE)
2141  pkt->duration = duration;
2142 
2143  if (st->codec->codec_id == AV_CODEC_ID_SSA)
2144  matroska_fix_ass_packet(matroska, pkt, duration);
2145 
2146  if (matroska->prev_pkt &&
2147  timecode != AV_NOPTS_VALUE &&
2148  matroska->prev_pkt->pts == timecode &&
2149  matroska->prev_pkt->stream_index == st->index &&
2150  st->codec->codec_id == AV_CODEC_ID_SSA)
2151  matroska_merge_packets(matroska->prev_pkt, pkt);
2152  else {
2153  dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
2154  matroska->prev_pkt = pkt;
2155  }
2156 
2157  return 0;
2158 fail:
2159  if (pkt_data != data)
2160  av_freep(&pkt_data);
2161  return res;
2162 }
2163 
2165  int size, int64_t pos, uint64_t cluster_time,
2166  uint64_t block_duration, int is_keyframe,
2167  int64_t cluster_pos)
2168 {
2169  uint64_t timecode = AV_NOPTS_VALUE;
2170  MatroskaTrack *track;
2171  int res = 0;
2172  AVStream *st;
2173  int16_t block_time;
2174  uint32_t *lace_size = NULL;
2175  int n, flags, laces = 0;
2176  uint64_t num, duration;
2177 
2178  if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
2179  av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
2180  return n;
2181  }
2182  data += n;
2183  size -= n;
2184 
2185  track = matroska_find_track_by_num(matroska, num);
2186  if (!track || !track->stream) {
2187  av_log(matroska->ctx, AV_LOG_INFO,
2188  "Invalid stream %"PRIu64" or size %u\n", num, size);
2189  return AVERROR_INVALIDDATA;
2190  } else if (size <= 3)
2191  return 0;
2192  st = track->stream;
2193  if (st->discard >= AVDISCARD_ALL)
2194  return res;
2195 
2196  block_time = AV_RB16(data);
2197  data += 2;
2198  flags = *data++;
2199  size -= 3;
2200  if (is_keyframe == -1)
2201  is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0;
2202 
2203  if (cluster_time != (uint64_t)-1
2204  && (block_time >= 0 || cluster_time >= -block_time)) {
2205  timecode = cluster_time + block_time - track->codec_delay;
2206  if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE
2207  && timecode < track->end_timecode)
2208  is_keyframe = 0; /* overlapping subtitles are not key frame */
2209  if (is_keyframe)
2210  av_add_index_entry(st, cluster_pos, timecode, 0,0,AVINDEX_KEYFRAME);
2211  }
2212 
2213  if (matroska->skip_to_keyframe && track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
2214  if (!is_keyframe || timecode < matroska->skip_to_timecode)
2215  return res;
2216  matroska->skip_to_keyframe = 0;
2217  }
2218 
2219  res = matroska_parse_laces(matroska, &data, &size, (flags & 0x06) >> 1,
2220  &lace_size, &laces);
2221 
2222  if (res)
2223  goto end;
2224 
2225  if (block_duration != AV_NOPTS_VALUE) {
2226  duration = block_duration / laces;
2227  if (block_duration != duration * laces) {
2228  av_log(matroska->ctx, AV_LOG_WARNING,
2229  "Incorrect block_duration, possibly corrupted container");
2230  }
2231  } else {
2232  duration = track->default_duration / matroska->time_scale;
2233  block_duration = duration * laces;
2234  }
2235 
2236  if (timecode != AV_NOPTS_VALUE)
2237  track->end_timecode =
2238  FFMAX(track->end_timecode, timecode + block_duration);
2239 
2240  for (n = 0; n < laces; n++) {
2241  if ((st->codec->codec_id == AV_CODEC_ID_RA_288 ||
2242  st->codec->codec_id == AV_CODEC_ID_COOK ||
2243  st->codec->codec_id == AV_CODEC_ID_SIPR ||
2244  st->codec->codec_id == AV_CODEC_ID_ATRAC3) &&
2245  st->codec->block_align && track->audio.sub_packet_size) {
2246 
2247  res = matroska_parse_rm_audio(matroska, track, st, data,
2248  lace_size[n],
2249  timecode, duration, pos);
2250  if (res)
2251  goto end;
2252 
2253  } else {
2254  res = matroska_parse_frame(matroska, track, st, data, lace_size[n],
2255  timecode, duration,
2256  pos, !n? is_keyframe : 0);
2257  if (res)
2258  goto end;
2259  }
2260 
2261  if (timecode != AV_NOPTS_VALUE)
2262  timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
2263  data += lace_size[n];
2264  }
2265 
2266 end:
2267  av_free(lace_size);
2268  return res;
2269 }
2270 
2272 {
2273  EbmlList *blocks_list;
2274  MatroskaBlock *blocks;
2275  int i, res;
2276  res = ebml_parse(matroska,
2277  matroska_cluster_incremental_parsing,
2278  &matroska->current_cluster);
2279  if (res == 1) {
2280  /* New Cluster */
2281  if (matroska->current_cluster_pos)
2282  ebml_level_end(matroska);
2283  ebml_free(matroska_cluster, &matroska->current_cluster);
2284  memset(&matroska->current_cluster, 0, sizeof(MatroskaCluster));
2285  matroska->current_cluster_num_blocks = 0;
2286  matroska->current_cluster_pos = avio_tell(matroska->ctx->pb);
2287  matroska->prev_pkt = NULL;
2288  /* sizeof the ID which was already read */
2289  if (matroska->current_id)
2290  matroska->current_cluster_pos -= 4;
2291  res = ebml_parse(matroska,
2292  matroska_clusters_incremental,
2293  &matroska->current_cluster);
2294  /* Try parsing the block again. */
2295  if (res == 1)
2296  res = ebml_parse(matroska,
2297  matroska_cluster_incremental_parsing,
2298  &matroska->current_cluster);
2299  }
2300 
2301  if (!res &&
2302  matroska->current_cluster_num_blocks <
2303  matroska->current_cluster.blocks.nb_elem) {
2304  blocks_list = &matroska->current_cluster.blocks;
2305  blocks = blocks_list->elem;
2306 
2307  matroska->current_cluster_num_blocks = blocks_list->nb_elem;
2308  i = blocks_list->nb_elem - 1;
2309  if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
2310  int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
2311  if (!blocks[i].non_simple)
2312  blocks[i].duration = AV_NOPTS_VALUE;
2313  res = matroska_parse_block(matroska,
2314  blocks[i].bin.data, blocks[i].bin.size,
2315  blocks[i].bin.pos,
2316  matroska->current_cluster.timecode,
2317  blocks[i].duration, is_keyframe,
2318  matroska->current_cluster_pos);
2319  }
2320  }
2321 
2322  if (res < 0) matroska->done = 1;
2323  return res;
2324 }
2325 
2327 {
2328  MatroskaCluster cluster = { 0 };
2329  EbmlList *blocks_list;
2330  MatroskaBlock *blocks;
2331  int i, res;
2332  int64_t pos;
2333  if (!matroska->contains_ssa)
2334  return matroska_parse_cluster_incremental(matroska);
2335  pos = avio_tell(matroska->ctx->pb);
2336  matroska->prev_pkt = NULL;
2337  if (matroska->current_id)
2338  pos -= 4; /* sizeof the ID which was already read */
2339  res = ebml_parse(matroska, matroska_clusters, &cluster);
2340  blocks_list = &cluster.blocks;
2341  blocks = blocks_list->elem;
2342  for (i=0; i<blocks_list->nb_elem && !res; i++)
2343  if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
2344  int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
2345  if (!blocks[i].non_simple)
2346  blocks[i].duration = AV_NOPTS_VALUE;
2347  res=matroska_parse_block(matroska,
2348  blocks[i].bin.data, blocks[i].bin.size,
2349  blocks[i].bin.pos, cluster.timecode,
2350  blocks[i].duration, is_keyframe,
2351  pos);
2352  }
2353  ebml_free(matroska_cluster, &cluster);
2354  return res;
2355 }
2356 
2358 {
2359  MatroskaDemuxContext *matroska = s->priv_data;
2360  int ret = 0;
2361 
2362  while (!ret && matroska_deliver_packet(matroska, pkt)) {
2363  int64_t pos = avio_tell(matroska->ctx->pb);
2364  if (matroska->done)
2365  return AVERROR_EOF;
2366  if (matroska_parse_cluster(matroska) < 0)
2367  ret = matroska_resync(matroska, pos);
2368  }
2369 
2370  if (ret == AVERROR_INVALIDDATA && pkt->data) {
2371  pkt->flags |= AV_PKT_FLAG_CORRUPT;
2372  return 0;
2373  }
2374 
2375  return ret;
2376 }
2377 
2378 static int matroska_read_seek(AVFormatContext *s, int stream_index,
2379  int64_t timestamp, int flags)
2380 {
2381  MatroskaDemuxContext *matroska = s->priv_data;
2382  MatroskaTrack *tracks = NULL;
2383  AVStream *st = s->streams[stream_index];
2384  int i, index, index_sub, index_min;
2385 
2386  /* Parse the CUES now since we need the index data to seek. */
2387  if (matroska->cues_parsing_deferred) {
2388  matroska_parse_cues(matroska);
2389  matroska->cues_parsing_deferred = 0;
2390  }
2391 
2392  if (!st->nb_index_entries)
2393  return 0;
2394  timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
2395 
2396  if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
2397  avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
2398  matroska->current_id = 0;
2399  while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
2400  matroska_clear_queue(matroska);
2401  if (matroska_parse_cluster(matroska) < 0)
2402  break;
2403  }
2404  }
2405 
2406  matroska_clear_queue(matroska);
2407  if (index < 0)
2408  return 0;
2409 
2410  index_min = index;
2411  tracks = matroska->tracks.elem;
2412  for (i=0; i < matroska->tracks.nb_elem; i++) {
2413  tracks[i].audio.pkt_cnt = 0;
2414  tracks[i].audio.sub_packet_cnt = 0;
2415  tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
2416  tracks[i].end_timecode = 0;
2417  if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
2418  && !tracks[i].stream->discard != AVDISCARD_ALL) {
2419  index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
2420  if (index_sub >= 0
2421  && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
2422  && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
2423  index_min = index_sub;
2424  }
2425  }
2426 
2427  avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
2428  matroska->current_id = 0;
2429  matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
2430  matroska->skip_to_timecode = st->index_entries[index].timestamp;
2431  matroska->done = 0;
2432  ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
2433  return 0;
2434 }
2435 
2437 {
2438  MatroskaDemuxContext *matroska = s->priv_data;
2439  MatroskaTrack *tracks = matroska->tracks.elem;
2440  int n;
2441 
2442  matroska_clear_queue(matroska);
2443 
2444  for (n=0; n < matroska->tracks.nb_elem; n++)
2445  if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
2446  av_free(tracks[n].audio.buf);
2447  ebml_free(matroska_cluster, &matroska->current_cluster);
2448  ebml_free(matroska_segment, matroska);
2449 
2450  return 0;
2451 }
2452 
2454  .name = "matroska,webm",
2455  .long_name = NULL_IF_CONFIG_SMALL("Matroska / WebM"),
2456  .priv_data_size = sizeof(MatroskaDemuxContext),
2462 };