Libav
h264.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... decoder
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #include "libavutil/avassert.h"
29 #include "libavutil/imgutils.h"
30 #include "libavutil/stereo3d.h"
31 #include "libavutil/timer.h"
32 #include "internal.h"
33 #include "cabac.h"
34 #include "cabac_functions.h"
35 #include "dsputil.h"
36 #include "error_resilience.h"
37 #include "avcodec.h"
38 #include "mpegvideo.h"
39 #include "h264.h"
40 #include "h264data.h"
41 #include "h264chroma.h"
42 #include "h264_mvpred.h"
43 #include "golomb.h"
44 #include "mathops.h"
45 #include "rectangle.h"
46 #include "svq3.h"
47 #include "thread.h"
48 
49 #include <assert.h>
50 
51 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
52 
53 static const uint8_t rem6[QP_MAX_NUM + 1] = {
54  0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
55  3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
56  0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
57 };
58 
59 static const uint8_t div6[QP_MAX_NUM + 1] = {
60  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
61  3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
62  7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
63 };
64 
65 static const uint8_t field_scan[16] = {
66  0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
67  0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
68  2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
69  3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
70 };
71 
72 static const uint8_t field_scan8x8[64] = {
73  0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
74  1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
75  2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
76  0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
77  2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
78  2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
79  2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
80  3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
81  3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
82  4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
83  4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
84  5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
85  5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
86  7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
87  6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
88  7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
89 };
90 
91 static const uint8_t field_scan8x8_cavlc[64] = {
92  0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
93  2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
94  3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
95  5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
96  0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
97  1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
98  3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
99  5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
100  0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
101  1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
102  3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
103  5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
104  1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
105  1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
106  3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
107  6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
108 };
109 
110 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
111 static const uint8_t zigzag_scan8x8_cavlc[64] = {
112  0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
113  4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
114  3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
115  2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
116  1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
117  3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
118  2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
119  3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
120  0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
121  2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
122  1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
123  4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
124  0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
125  1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
126  0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
127  5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
128 };
129 
130 static const uint8_t dequant4_coeff_init[6][3] = {
131  { 10, 13, 16 },
132  { 11, 14, 18 },
133  { 13, 16, 20 },
134  { 14, 18, 23 },
135  { 16, 20, 25 },
136  { 18, 23, 29 },
137 };
138 
139 static const uint8_t dequant8_coeff_init_scan[16] = {
140  0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
141 };
142 
143 static const uint8_t dequant8_coeff_init[6][6] = {
144  { 20, 18, 32, 19, 25, 24 },
145  { 22, 19, 35, 21, 28, 26 },
146  { 26, 23, 42, 24, 33, 31 },
147  { 28, 25, 45, 26, 35, 33 },
148  { 32, 28, 51, 30, 40, 38 },
149  { 36, 32, 58, 34, 46, 43 },
150 };
151 
153 #if CONFIG_H264_DXVA2_HWACCEL
155 #endif
156 #if CONFIG_H264_VAAPI_HWACCEL
158 #endif
159 #if CONFIG_H264_VDA_HWACCEL
161 #endif
162 #if CONFIG_H264_VDPAU_HWACCEL
164 #endif
167 };
168 
170 #if CONFIG_H264_DXVA2_HWACCEL
172 #endif
173 #if CONFIG_H264_VAAPI_HWACCEL
175 #endif
176 #if CONFIG_H264_VDA_HWACCEL
178 #endif
179 #if CONFIG_H264_VDPAU_HWACCEL
181 #endif
184 };
185 
186 static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
187  int (*mv)[2][4][2],
188  int mb_x, int mb_y, int mb_intra, int mb_skipped)
189 {
190  H264Context *h = opaque;
191 
192  h->mb_x = mb_x;
193  h->mb_y = mb_y;
194  h->mb_xy = mb_x + mb_y * h->mb_stride;
195  memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
196  assert(ref >= 0);
197  /* FIXME: It is possible albeit uncommon that slice references
198  * differ between slices. We take the easy approach and ignore
199  * it for now. If this turns out to have any relevance in
200  * practice then correct remapping should be added. */
201  if (ref >= h->ref_count[0])
202  ref = 0;
203  fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy],
204  2, 2, 2, ref, 1);
205  fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
206  fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
207  pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
208  assert(!FRAME_MBAFF(h));
210 }
211 
213 {
214  AVCodecContext *avctx = h->avctx;
215  Picture *cur = &h->cur_pic;
216  Picture *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL;
217  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
218  int vshift = desc->log2_chroma_h;
219  const int field_pic = h->picture_structure != PICT_FRAME;
220  if (field_pic) {
221  height <<= 1;
222  y <<= 1;
223  }
224 
225  height = FFMIN(height, avctx->height - y);
226 
227  if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD))
228  return;
229 
230  if (avctx->draw_horiz_band) {
231  AVFrame *src;
232  int offset[AV_NUM_DATA_POINTERS];
233  int i;
234 
235  if (cur->f.pict_type == AV_PICTURE_TYPE_B || h->low_delay ||
237  src = &cur->f;
238  else if (last)
239  src = &last->f;
240  else
241  return;
242 
243  offset[0] = y * src->linesize[0];
244  offset[1] =
245  offset[2] = (y >> vshift) * src->linesize[1];
246  for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
247  offset[i] = 0;
248 
249  emms_c();
250 
251  avctx->draw_horiz_band(avctx, src, offset,
252  y, h->picture_structure, height);
253  }
254 }
255 
256 static void unref_picture(H264Context *h, Picture *pic)
257 {
258  int off = offsetof(Picture, tf) + sizeof(pic->tf);
259  int i;
260 
261  if (!pic->f.buf[0])
262  return;
263 
264  ff_thread_release_buffer(h->avctx, &pic->tf);
266 
269  for (i = 0; i < 2; i++) {
271  av_buffer_unref(&pic->ref_index_buf[i]);
272  }
273 
274  memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
275 }
276 
277 static void release_unused_pictures(H264Context *h, int remove_current)
278 {
279  int i;
280 
281  /* release non reference frames */
282  for (i = 0; i < MAX_PICTURE_COUNT; i++) {
283  if (h->DPB[i].f.buf[0] && !h->DPB[i].reference &&
284  (remove_current || &h->DPB[i] != h->cur_pic_ptr)) {
285  unref_picture(h, &h->DPB[i]);
286  }
287  }
288 }
289 
290 static int ref_picture(H264Context *h, Picture *dst, Picture *src)
291 {
292  int ret, i;
293 
294  av_assert0(!dst->f.buf[0]);
295  av_assert0(src->f.buf[0]);
296 
297  src->tf.f = &src->f;
298  dst->tf.f = &dst->f;
299  ret = ff_thread_ref_frame(&dst->tf, &src->tf);
300  if (ret < 0)
301  goto fail;
302 
305  if (!dst->qscale_table_buf || !dst->mb_type_buf)
306  goto fail;
307  dst->qscale_table = src->qscale_table;
308  dst->mb_type = src->mb_type;
309 
310  for (i = 0; i < 2; i++) {
311  dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]);
312  dst->ref_index_buf[i] = av_buffer_ref(src->ref_index_buf[i]);
313  if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i])
314  goto fail;
315  dst->motion_val[i] = src->motion_val[i];
316  dst->ref_index[i] = src->ref_index[i];
317  }
318 
319  if (src->hwaccel_picture_private) {
321  if (!dst->hwaccel_priv_buf)
322  goto fail;
324  }
325 
326  for (i = 0; i < 2; i++)
327  dst->field_poc[i] = src->field_poc[i];
328 
329  memcpy(dst->ref_poc, src->ref_poc, sizeof(src->ref_poc));
330  memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count));
331 
332  dst->poc = src->poc;
333  dst->frame_num = src->frame_num;
334  dst->mmco_reset = src->mmco_reset;
335  dst->pic_id = src->pic_id;
336  dst->long_ref = src->long_ref;
337  dst->mbaff = src->mbaff;
338  dst->field_picture = src->field_picture;
339  dst->needs_realloc = src->needs_realloc;
340  dst->reference = src->reference;
341  dst->recovered = src->recovered;
342 
343  return 0;
344 fail:
345  unref_picture(h, dst);
346  return ret;
347 }
348 
349 static int alloc_scratch_buffers(H264Context *h, int linesize)
350 {
351  int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
352 
353  if (h->bipred_scratchpad)
354  return 0;
355 
356  h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
357  // edge emu needs blocksize + filter length - 1
358  // (= 21x21 for h264)
359  h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
360  h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2);
361 
362  if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) {
365  av_freep(&h->me.scratchpad);
366  return AVERROR(ENOMEM);
367  }
368 
369  h->me.temp = h->me.scratchpad;
370 
371  return 0;
372 }
373 
375 {
376  const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
377  const int mb_array_size = h->mb_stride * h->mb_height;
378  const int b4_stride = h->mb_width * 4 + 1;
379  const int b4_array_size = b4_stride * h->mb_height * 4;
380 
381  h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride,
383  h->mb_type_pool = av_buffer_pool_init((big_mb_num + h->mb_stride) *
384  sizeof(uint32_t), av_buffer_allocz);
385  h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) *
386  sizeof(int16_t), av_buffer_allocz);
387  h->ref_index_pool = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz);
388 
389  if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool ||
390  !h->ref_index_pool) {
395  return AVERROR(ENOMEM);
396  }
397 
398  return 0;
399 }
400 
401 static int alloc_picture(H264Context *h, Picture *pic)
402 {
403  int i, ret = 0;
404 
405  av_assert0(!pic->f.data[0]);
406 
407  pic->tf.f = &pic->f;
408  ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ?
410  if (ret < 0)
411  goto fail;
412 
413  h->linesize = pic->f.linesize[0];
414  h->uvlinesize = pic->f.linesize[1];
415 
416  if (h->avctx->hwaccel) {
417  const AVHWAccel *hwaccel = h->avctx->hwaccel;
419  if (hwaccel->priv_data_size) {
421  if (!pic->hwaccel_priv_buf)
422  return AVERROR(ENOMEM);
424  }
425  }
426 
427  if (!h->qscale_table_pool) {
428  ret = init_table_pools(h);
429  if (ret < 0)
430  goto fail;
431  }
432 
435  if (!pic->qscale_table_buf || !pic->mb_type_buf)
436  goto fail;
437 
438  pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
439  pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1;
440 
441  for (i = 0; i < 2; i++) {
444  if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i])
445  goto fail;
446 
447  pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
448  pic->ref_index[i] = pic->ref_index_buf[i]->data;
449  }
450 
451  return 0;
452 fail:
453  unref_picture(h, pic);
454  return (ret < 0) ? ret : AVERROR(ENOMEM);
455 }
456 
457 static inline int pic_is_unused(H264Context *h, Picture *pic)
458 {
459  if (!pic->f.buf[0])
460  return 1;
461  if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
462  return 1;
463  return 0;
464 }
465 
467 {
468  int i;
469 
470  for (i = 0; i < MAX_PICTURE_COUNT; i++) {
471  if (pic_is_unused(h, &h->DPB[i]))
472  break;
473  }
474  if (i == MAX_PICTURE_COUNT)
475  return AVERROR_INVALIDDATA;
476 
477  if (h->DPB[i].needs_realloc) {
478  h->DPB[i].needs_realloc = 0;
479  unref_picture(h, &h->DPB[i]);
480  }
481 
482  return i;
483 }
484 
490 {
491  static const int8_t top[12] = {
492  -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
493  };
494  static const int8_t left[12] = {
495  0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
496  };
497  int i;
498 
499  if (!(h->top_samples_available & 0x8000)) {
500  for (i = 0; i < 4; i++) {
501  int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
502  if (status < 0) {
504  "top block unavailable for requested intra4x4 mode %d at %d %d\n",
505  status, h->mb_x, h->mb_y);
506  return AVERROR_INVALIDDATA;
507  } else if (status) {
508  h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
509  }
510  }
511  }
512 
513  if ((h->left_samples_available & 0x8888) != 0x8888) {
514  static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
515  for (i = 0; i < 4; i++)
516  if (!(h->left_samples_available & mask[i])) {
517  int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
518  if (status < 0) {
520  "left block unavailable for requested intra4x4 mode %d at %d %d\n",
521  status, h->mb_x, h->mb_y);
522  return AVERROR_INVALIDDATA;
523  } else if (status) {
524  h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
525  }
526  }
527  }
528 
529  return 0;
530 } // FIXME cleanup like ff_h264_check_intra_pred_mode
531 
536 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
537 {
538  static const int8_t top[4] = { LEFT_DC_PRED8x8, 1, -1, -1 };
539  static const int8_t left[5] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
540 
541  if (mode > 3U) {
543  "out of range intra chroma pred mode at %d %d\n",
544  h->mb_x, h->mb_y);
545  return AVERROR_INVALIDDATA;
546  }
547 
548  if (!(h->top_samples_available & 0x8000)) {
549  mode = top[mode];
550  if (mode < 0) {
552  "top block unavailable for requested intra mode at %d %d\n",
553  h->mb_x, h->mb_y);
554  return AVERROR_INVALIDDATA;
555  }
556  }
557 
558  if ((h->left_samples_available & 0x8080) != 0x8080) {
559  mode = left[mode];
560  if (is_chroma && (h->left_samples_available & 0x8080)) {
561  // mad cow disease mode, aka MBAFF + constrained_intra_pred
562  mode = ALZHEIMER_DC_L0T_PRED8x8 +
563  (!(h->left_samples_available & 0x8000)) +
564  2 * (mode == DC_128_PRED8x8);
565  }
566  if (mode < 0) {
568  "left block unavailable for requested intra mode at %d %d\n",
569  h->mb_x, h->mb_y);
570  return AVERROR_INVALIDDATA;
571  }
572  }
573 
574  return mode;
575 }
576 
578  int *dst_length, int *consumed, int length)
579 {
580  int i, si, di;
581  uint8_t *dst;
582  int bufidx;
583 
584  // src[0]&0x80; // forbidden bit
585  h->nal_ref_idc = src[0] >> 5;
586  h->nal_unit_type = src[0] & 0x1F;
587 
588  src++;
589  length--;
590 
591 #define STARTCODE_TEST \
592  if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
593  if (src[i + 2] != 3) { \
594  /* startcode, so we must be past the end */ \
595  length = i; \
596  } \
597  break; \
598  }
599 
600 #if HAVE_FAST_UNALIGNED
601 #define FIND_FIRST_ZERO \
602  if (i > 0 && !src[i]) \
603  i--; \
604  while (src[i]) \
605  i++
606 
607 #if HAVE_FAST_64BIT
608  for (i = 0; i + 1 < length; i += 9) {
609  if (!((~AV_RN64A(src + i) &
610  (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
611  0x8000800080008080ULL))
612  continue;
613  FIND_FIRST_ZERO;
615  i -= 7;
616  }
617 #else
618  for (i = 0; i + 1 < length; i += 5) {
619  if (!((~AV_RN32A(src + i) &
620  (AV_RN32A(src + i) - 0x01000101U)) &
621  0x80008080U))
622  continue;
623  FIND_FIRST_ZERO;
625  i -= 3;
626  }
627 #endif
628 #else
629  for (i = 0; i + 1 < length; i += 2) {
630  if (src[i])
631  continue;
632  if (i > 0 && src[i - 1] == 0)
633  i--;
635  }
636 #endif
637 
638  if (i >= length - 1) { // no escaped 0
639  *dst_length = length;
640  *consumed = length + 1; // +1 for the header
641  return src;
642  }
643 
644  // use second escape buffer for inter data
645  bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
646  av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
648  dst = h->rbsp_buffer[bufidx];
649 
650  if (dst == NULL)
651  return NULL;
652 
653  memcpy(dst, src, i);
654  si = di = i;
655  while (si + 2 < length) {
656  // remove escapes (very rare 1:2^22)
657  if (src[si + 2] > 3) {
658  dst[di++] = src[si++];
659  dst[di++] = src[si++];
660  } else if (src[si] == 0 && src[si + 1] == 0) {
661  if (src[si + 2] == 3) { // escape
662  dst[di++] = 0;
663  dst[di++] = 0;
664  si += 3;
665  continue;
666  } else // next start code
667  goto nsc;
668  }
669 
670  dst[di++] = src[si++];
671  }
672  while (si < length)
673  dst[di++] = src[si++];
674 
675 nsc:
676  memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
677 
678  *dst_length = di;
679  *consumed = si + 1; // +1 for the header
680  /* FIXME store exact number of bits in the getbitcontext
681  * (it is needed for decoding) */
682  return dst;
683 }
684 
689 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
690 {
691  int v = *src;
692  int r;
693 
694  tprintf(h->avctx, "rbsp trailing %X\n", v);
695 
696  for (r = 1; r < 9; r++) {
697  if (v & 1)
698  return r;
699  v >>= 1;
700  }
701  return 0;
702 }
703 
704 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
705  int height, int y_offset, int list)
706 {
707  int raw_my = h->mv_cache[list][scan8[n]][1];
708  int filter_height_up = (raw_my & 3) ? 2 : 0;
709  int filter_height_down = (raw_my & 3) ? 3 : 0;
710  int full_my = (raw_my >> 2) + y_offset;
711  int top = full_my - filter_height_up;
712  int bottom = full_my + filter_height_down + height;
713 
714  return FFMAX(abs(top), bottom);
715 }
716 
717 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
718  int height, int y_offset, int list0,
719  int list1, int *nrefs)
720 {
721  int my;
722 
723  y_offset += 16 * (h->mb_y >> MB_FIELD(h));
724 
725  if (list0) {
726  int ref_n = h->ref_cache[0][scan8[n]];
727  Picture *ref = &h->ref_list[0][ref_n];
728 
729  // Error resilience puts the current picture in the ref list.
730  // Don't try to wait on these as it will cause a deadlock.
731  // Fields can wait on each other, though.
732  if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
733  (ref->reference & 3) != h->picture_structure) {
734  my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
735  if (refs[0][ref_n] < 0)
736  nrefs[0] += 1;
737  refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
738  }
739  }
740 
741  if (list1) {
742  int ref_n = h->ref_cache[1][scan8[n]];
743  Picture *ref = &h->ref_list[1][ref_n];
744 
745  if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
746  (ref->reference & 3) != h->picture_structure) {
747  my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
748  if (refs[1][ref_n] < 0)
749  nrefs[1] += 1;
750  refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
751  }
752  }
753 }
754 
761 {
762  const int mb_xy = h->mb_xy;
763  const int mb_type = h->cur_pic.mb_type[mb_xy];
764  int refs[2][48];
765  int nrefs[2] = { 0 };
766  int ref, list;
767 
768  memset(refs, -1, sizeof(refs));
769 
770  if (IS_16X16(mb_type)) {
771  get_lowest_part_y(h, refs, 0, 16, 0,
772  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
773  } else if (IS_16X8(mb_type)) {
774  get_lowest_part_y(h, refs, 0, 8, 0,
775  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
776  get_lowest_part_y(h, refs, 8, 8, 8,
777  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
778  } else if (IS_8X16(mb_type)) {
779  get_lowest_part_y(h, refs, 0, 16, 0,
780  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
781  get_lowest_part_y(h, refs, 4, 16, 0,
782  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
783  } else {
784  int i;
785 
786  assert(IS_8X8(mb_type));
787 
788  for (i = 0; i < 4; i++) {
789  const int sub_mb_type = h->sub_mb_type[i];
790  const int n = 4 * i;
791  int y_offset = (i & 2) << 2;
792 
793  if (IS_SUB_8X8(sub_mb_type)) {
794  get_lowest_part_y(h, refs, n, 8, y_offset,
795  IS_DIR(sub_mb_type, 0, 0),
796  IS_DIR(sub_mb_type, 0, 1),
797  nrefs);
798  } else if (IS_SUB_8X4(sub_mb_type)) {
799  get_lowest_part_y(h, refs, n, 4, y_offset,
800  IS_DIR(sub_mb_type, 0, 0),
801  IS_DIR(sub_mb_type, 0, 1),
802  nrefs);
803  get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
804  IS_DIR(sub_mb_type, 0, 0),
805  IS_DIR(sub_mb_type, 0, 1),
806  nrefs);
807  } else if (IS_SUB_4X8(sub_mb_type)) {
808  get_lowest_part_y(h, refs, n, 8, y_offset,
809  IS_DIR(sub_mb_type, 0, 0),
810  IS_DIR(sub_mb_type, 0, 1),
811  nrefs);
812  get_lowest_part_y(h, refs, n + 1, 8, y_offset,
813  IS_DIR(sub_mb_type, 0, 0),
814  IS_DIR(sub_mb_type, 0, 1),
815  nrefs);
816  } else {
817  int j;
818  assert(IS_SUB_4X4(sub_mb_type));
819  for (j = 0; j < 4; j++) {
820  int sub_y_offset = y_offset + 2 * (j & 2);
821  get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
822  IS_DIR(sub_mb_type, 0, 0),
823  IS_DIR(sub_mb_type, 0, 1),
824  nrefs);
825  }
826  }
827  }
828  }
829 
830  for (list = h->list_count - 1; list >= 0; list--)
831  for (ref = 0; ref < 48 && nrefs[list]; ref++) {
832  int row = refs[list][ref];
833  if (row >= 0) {
834  Picture *ref_pic = &h->ref_list[list][ref];
835  int ref_field = ref_pic->reference - 1;
836  int ref_field_picture = ref_pic->field_picture;
837  int pic_height = 16 * h->mb_height >> ref_field_picture;
838 
839  row <<= MB_MBAFF(h);
840  nrefs[list]--;
841 
842  if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields
843  ff_thread_await_progress(&ref_pic->tf,
844  FFMIN((row >> 1) - !(row & 1),
845  pic_height - 1),
846  1);
847  ff_thread_await_progress(&ref_pic->tf,
848  FFMIN((row >> 1), pic_height - 1),
849  0);
850  } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame
851  ff_thread_await_progress(&ref_pic->tf,
852  FFMIN(row * 2 + ref_field,
853  pic_height - 1),
854  0);
855  } else if (FIELD_PICTURE(h)) {
856  ff_thread_await_progress(&ref_pic->tf,
857  FFMIN(row, pic_height - 1),
858  ref_field);
859  } else {
860  ff_thread_await_progress(&ref_pic->tf,
861  FFMIN(row, pic_height - 1),
862  0);
863  }
864  }
865  }
866 }
867 
869  int n, int square, int height,
870  int delta, int list,
871  uint8_t *dest_y, uint8_t *dest_cb,
872  uint8_t *dest_cr,
873  int src_x_offset, int src_y_offset,
874  qpel_mc_func *qpix_op,
875  h264_chroma_mc_func chroma_op,
876  int pixel_shift, int chroma_idc)
877 {
878  const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
879  int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
880  const int luma_xy = (mx & 3) + ((my & 3) << 2);
881  ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
882  uint8_t *src_y = pic->f.data[0] + offset;
883  uint8_t *src_cb, *src_cr;
884  int extra_width = 0;
885  int extra_height = 0;
886  int emu = 0;
887  const int full_mx = mx >> 2;
888  const int full_my = my >> 2;
889  const int pic_width = 16 * h->mb_width;
890  const int pic_height = 16 * h->mb_height >> MB_FIELD(h);
891  int ysh;
892 
893  if (mx & 7)
894  extra_width -= 3;
895  if (my & 7)
896  extra_height -= 3;
897 
898  if (full_mx < 0 - extra_width ||
899  full_my < 0 - extra_height ||
900  full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
901  full_my + 16 /*FIXME*/ > pic_height + extra_height) {
903  src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
904  h->mb_linesize, h->mb_linesize,
905  16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
906  full_my - 2, pic_width, pic_height);
907  src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
908  emu = 1;
909  }
910 
911  qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
912  if (!square)
913  qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
914 
915  if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
916  return;
917 
918  if (chroma_idc == 3 /* yuv444 */) {
919  src_cb = pic->f.data[1] + offset;
920  if (emu) {
922  src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
923  h->mb_linesize, h->mb_linesize,
924  16 + 5, 16 + 5 /*FIXME*/,
925  full_mx - 2, full_my - 2,
926  pic_width, pic_height);
927  src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
928  }
929  qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
930  if (!square)
931  qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
932 
933  src_cr = pic->f.data[2] + offset;
934  if (emu) {
936  src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
937  h->mb_linesize, h->mb_linesize,
938  16 + 5, 16 + 5 /*FIXME*/,
939  full_mx - 2, full_my - 2,
940  pic_width, pic_height);
941  src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
942  }
943  qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
944  if (!square)
945  qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
946  return;
947  }
948 
949  ysh = 3 - (chroma_idc == 2 /* yuv422 */);
950  if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) {
951  // chroma offset when predicting from a field of opposite parity
952  my += 2 * ((h->mb_y & 1) - (pic->reference - 1));
953  emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
954  }
955 
956  src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
957  (my >> ysh) * h->mb_uvlinesize;
958  src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
959  (my >> ysh) * h->mb_uvlinesize;
960 
961  if (emu) {
962  h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb,
964  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
965  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
966  src_cb = h->edge_emu_buffer;
967  }
968  chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
969  height >> (chroma_idc == 1 /* yuv420 */),
970  mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
971 
972  if (emu) {
973  h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr,
975  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
976  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
977  src_cr = h->edge_emu_buffer;
978  }
979  chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
980  mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
981 }
982 
983 static av_always_inline void mc_part_std(H264Context *h, int n, int square,
984  int height, int delta,
985  uint8_t *dest_y, uint8_t *dest_cb,
986  uint8_t *dest_cr,
987  int x_offset, int y_offset,
988  qpel_mc_func *qpix_put,
989  h264_chroma_mc_func chroma_put,
990  qpel_mc_func *qpix_avg,
991  h264_chroma_mc_func chroma_avg,
992  int list0, int list1,
993  int pixel_shift, int chroma_idc)
994 {
995  qpel_mc_func *qpix_op = qpix_put;
996  h264_chroma_mc_func chroma_op = chroma_put;
997 
998  dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
999  if (chroma_idc == 3 /* yuv444 */) {
1000  dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1001  dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1002  } else if (chroma_idc == 2 /* yuv422 */) {
1003  dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1004  dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1005  } else { /* yuv420 */
1006  dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1007  dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1008  }
1009  x_offset += 8 * h->mb_x;
1010  y_offset += 8 * (h->mb_y >> MB_FIELD(h));
1011 
1012  if (list0) {
1013  Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
1014  mc_dir_part(h, ref, n, square, height, delta, 0,
1015  dest_y, dest_cb, dest_cr, x_offset, y_offset,
1016  qpix_op, chroma_op, pixel_shift, chroma_idc);
1017 
1018  qpix_op = qpix_avg;
1019  chroma_op = chroma_avg;
1020  }
1021 
1022  if (list1) {
1023  Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
1024  mc_dir_part(h, ref, n, square, height, delta, 1,
1025  dest_y, dest_cb, dest_cr, x_offset, y_offset,
1026  qpix_op, chroma_op, pixel_shift, chroma_idc);
1027  }
1028 }
1029 
1031  int height, int delta,
1032  uint8_t *dest_y, uint8_t *dest_cb,
1033  uint8_t *dest_cr,
1034  int x_offset, int y_offset,
1035  qpel_mc_func *qpix_put,
1036  h264_chroma_mc_func chroma_put,
1037  h264_weight_func luma_weight_op,
1038  h264_weight_func chroma_weight_op,
1039  h264_biweight_func luma_weight_avg,
1040  h264_biweight_func chroma_weight_avg,
1041  int list0, int list1,
1042  int pixel_shift, int chroma_idc)
1043 {
1044  int chroma_height;
1045 
1046  dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1047  if (chroma_idc == 3 /* yuv444 */) {
1048  chroma_height = height;
1049  chroma_weight_avg = luma_weight_avg;
1050  chroma_weight_op = luma_weight_op;
1051  dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1052  dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1053  } else if (chroma_idc == 2 /* yuv422 */) {
1054  chroma_height = height;
1055  dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1056  dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1057  } else { /* yuv420 */
1058  chroma_height = height >> 1;
1059  dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1060  dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1061  }
1062  x_offset += 8 * h->mb_x;
1063  y_offset += 8 * (h->mb_y >> MB_FIELD(h));
1064 
1065  if (list0 && list1) {
1066  /* don't optimize for luma-only case, since B-frames usually
1067  * use implicit weights => chroma too. */
1068  uint8_t *tmp_cb = h->bipred_scratchpad;
1069  uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
1070  uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize;
1071  int refn0 = h->ref_cache[0][scan8[n]];
1072  int refn1 = h->ref_cache[1][scan8[n]];
1073 
1074  mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
1075  dest_y, dest_cb, dest_cr,
1076  x_offset, y_offset, qpix_put, chroma_put,
1077  pixel_shift, chroma_idc);
1078  mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
1079  tmp_y, tmp_cb, tmp_cr,
1080  x_offset, y_offset, qpix_put, chroma_put,
1081  pixel_shift, chroma_idc);
1082 
1083  if (h->use_weight == 2) {
1084  int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1];
1085  int weight1 = 64 - weight0;
1086  luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
1087  height, 5, weight0, weight1, 0);
1088  chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
1089  chroma_height, 5, weight0, weight1, 0);
1090  chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
1091  chroma_height, 5, weight0, weight1, 0);
1092  } else {
1093  luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
1095  h->luma_weight[refn0][0][0],
1096  h->luma_weight[refn1][1][0],
1097  h->luma_weight[refn0][0][1] +
1098  h->luma_weight[refn1][1][1]);
1099  chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
1101  h->chroma_weight[refn0][0][0][0],
1102  h->chroma_weight[refn1][1][0][0],
1103  h->chroma_weight[refn0][0][0][1] +
1104  h->chroma_weight[refn1][1][0][1]);
1105  chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
1107  h->chroma_weight[refn0][0][1][0],
1108  h->chroma_weight[refn1][1][1][0],
1109  h->chroma_weight[refn0][0][1][1] +
1110  h->chroma_weight[refn1][1][1][1]);
1111  }
1112  } else {
1113  int list = list1 ? 1 : 0;
1114  int refn = h->ref_cache[list][scan8[n]];
1115  Picture *ref = &h->ref_list[list][refn];
1116  mc_dir_part(h, ref, n, square, height, delta, list,
1117  dest_y, dest_cb, dest_cr, x_offset, y_offset,
1118  qpix_put, chroma_put, pixel_shift, chroma_idc);
1119 
1120  luma_weight_op(dest_y, h->mb_linesize, height,
1122  h->luma_weight[refn][list][0],
1123  h->luma_weight[refn][list][1]);
1124  if (h->use_weight_chroma) {
1125  chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
1127  h->chroma_weight[refn][list][0][0],
1128  h->chroma_weight[refn][list][0][1]);
1129  chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
1131  h->chroma_weight[refn][list][1][0],
1132  h->chroma_weight[refn][list][1][1]);
1133  }
1134  }
1135 }
1136 
1138  int pixel_shift, int chroma_idc)
1139 {
1140  /* fetch pixels for estimated mv 4 macroblocks ahead
1141  * optimized for 64byte cache lines */
1142  const int refn = h->ref_cache[list][scan8[0]];
1143  if (refn >= 0) {
1144  const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8;
1145  const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y;
1146  uint8_t **src = h->ref_list[list][refn].f.data;
1147  int off = (mx << pixel_shift) +
1148  (my + (h->mb_x & 3) * 4) * h->mb_linesize +
1149  (64 << pixel_shift);
1150  h->vdsp.prefetch(src[0] + off, h->linesize, 4);
1151  if (chroma_idc == 3 /* yuv444 */) {
1152  h->vdsp.prefetch(src[1] + off, h->linesize, 4);
1153  h->vdsp.prefetch(src[2] + off, h->linesize, 4);
1154  } else {
1155  off = ((mx >> 1) << pixel_shift) +
1156  ((my >> 1) + (h->mb_x & 7)) * h->uvlinesize +
1157  (64 << pixel_shift);
1158  h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1159  }
1160  }
1161 }
1162 
1163 static void free_tables(H264Context *h, int free_rbsp)
1164 {
1165  int i;
1166  H264Context *hx;
1167 
1170  av_freep(&h->cbp_table);
1171  av_freep(&h->mvd_table[0]);
1172  av_freep(&h->mvd_table[1]);
1173  av_freep(&h->direct_table);
1174  av_freep(&h->non_zero_count);
1176  h->slice_table = NULL;
1177  av_freep(&h->list_counts);
1178 
1179  av_freep(&h->mb2b_xy);
1180  av_freep(&h->mb2br_xy);
1181 
1186 
1187  if (free_rbsp && h->DPB) {
1188  for (i = 0; i < MAX_PICTURE_COUNT; i++)
1189  unref_picture(h, &h->DPB[i]);
1190  av_freep(&h->DPB);
1191  } else if (h->DPB) {
1192  for (i = 0; i < MAX_PICTURE_COUNT; i++)
1193  h->DPB[i].needs_realloc = 1;
1194  }
1195 
1196  h->cur_pic_ptr = NULL;
1197 
1198  for (i = 0; i < MAX_THREADS; i++) {
1199  hx = h->thread_context[i];
1200  if (!hx)
1201  continue;
1202  av_freep(&hx->top_borders[1]);
1203  av_freep(&hx->top_borders[0]);
1205  av_freep(&hx->edge_emu_buffer);
1206  av_freep(&hx->dc_val_base);
1207  av_freep(&hx->me.scratchpad);
1208  av_freep(&hx->er.mb_index2xy);
1210  av_freep(&hx->er.er_temp_buffer);
1211  av_freep(&hx->er.mbintra_table);
1212  av_freep(&hx->er.mbskip_table);
1213 
1214  if (free_rbsp) {
1215  av_freep(&hx->rbsp_buffer[1]);
1216  av_freep(&hx->rbsp_buffer[0]);
1217  hx->rbsp_buffer_size[0] = 0;
1218  hx->rbsp_buffer_size[1] = 0;
1219  }
1220  if (i)
1221  av_freep(&h->thread_context[i]);
1222  }
1223 }
1224 
1226 {
1227  int i, j, q, x;
1228  const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
1229 
1230  for (i = 0; i < 6; i++) {
1231  h->dequant8_coeff[i] = h->dequant8_buffer[i];
1232  for (j = 0; j < i; j++)
1233  if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
1234  64 * sizeof(uint8_t))) {
1235  h->dequant8_coeff[i] = h->dequant8_buffer[j];
1236  break;
1237  }
1238  if (j < i)
1239  continue;
1240 
1241  for (q = 0; q < max_qp + 1; q++) {
1242  int shift = div6[q];
1243  int idx = rem6[q];
1244  for (x = 0; x < 64; x++)
1245  h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
1246  ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
1247  h->pps.scaling_matrix8[i][x]) << shift;
1248  }
1249  }
1250 }
1251 
1253 {
1254  int i, j, q, x;
1255  const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
1256  for (i = 0; i < 6; i++) {
1257  h->dequant4_coeff[i] = h->dequant4_buffer[i];
1258  for (j = 0; j < i; j++)
1259  if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
1260  16 * sizeof(uint8_t))) {
1261  h->dequant4_coeff[i] = h->dequant4_buffer[j];
1262  break;
1263  }
1264  if (j < i)
1265  continue;
1266 
1267  for (q = 0; q < max_qp + 1; q++) {
1268  int shift = div6[q] + 2;
1269  int idx = rem6[q];
1270  for (x = 0; x < 16; x++)
1271  h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
1272  ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
1273  h->pps.scaling_matrix4[i][x]) << shift;
1274  }
1275  }
1276 }
1277 
1279 {
1280  int i, x;
1282  if (h->pps.transform_8x8_mode)
1284  if (h->sps.transform_bypass) {
1285  for (i = 0; i < 6; i++)
1286  for (x = 0; x < 16; x++)
1287  h->dequant4_coeff[i][0][x] = 1 << 6;
1289  for (i = 0; i < 6; i++)
1290  for (x = 0; x < 64; x++)
1291  h->dequant8_coeff[i][0][x] = 1 << 6;
1292  }
1293 }
1294 
1296 {
1297  const int big_mb_num = h->mb_stride * (h->mb_height + 1);
1298  const int row_mb_num = h->mb_stride * 2 * h->avctx->thread_count;
1299  int x, y, i;
1300 
1302  row_mb_num * 8 * sizeof(uint8_t), fail)
1304  big_mb_num * 48 * sizeof(uint8_t), fail)
1306  (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail)
1308  big_mb_num * sizeof(uint16_t), fail)
1310  big_mb_num * sizeof(uint8_t), fail)
1311  FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0],
1312  16 * row_mb_num * sizeof(uint8_t), fail);
1313  FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1],
1314  16 * row_mb_num * sizeof(uint8_t), fail);
1316  4 * big_mb_num * sizeof(uint8_t), fail);
1318  big_mb_num * sizeof(uint8_t), fail)
1319 
1320  memset(h->slice_table_base, -1,
1321  (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base));
1322  h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1;
1323 
1325  big_mb_num * sizeof(uint32_t), fail);
1327  big_mb_num * sizeof(uint32_t), fail);
1328  for (y = 0; y < h->mb_height; y++)
1329  for (x = 0; x < h->mb_width; x++) {
1330  const int mb_xy = x + y * h->mb_stride;
1331  const int b_xy = 4 * x + 4 * y * h->b_stride;
1332 
1333  h->mb2b_xy[mb_xy] = b_xy;
1334  h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride)));
1335  }
1336 
1337  if (!h->dequant4_coeff[0])
1339 
1340  if (!h->DPB) {
1341  h->DPB = av_mallocz_array(MAX_PICTURE_COUNT, sizeof(*h->DPB));
1342  if (!h->DPB)
1343  return AVERROR(ENOMEM);
1344  for (i = 0; i < MAX_PICTURE_COUNT; i++)
1345  av_frame_unref(&h->DPB[i].f);
1346  av_frame_unref(&h->cur_pic.f);
1347  }
1348 
1349  return 0;
1350 
1351 fail:
1352  free_tables(h, 1);
1353  return AVERROR(ENOMEM);
1354 }
1355 
1359 static void clone_tables(H264Context *dst, H264Context *src, int i)
1360 {
1361  dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
1362  dst->non_zero_count = src->non_zero_count;
1363  dst->slice_table = src->slice_table;
1364  dst->cbp_table = src->cbp_table;
1365  dst->mb2b_xy = src->mb2b_xy;
1366  dst->mb2br_xy = src->mb2br_xy;
1368  dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * src->mb_stride;
1369  dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * src->mb_stride;
1370  dst->direct_table = src->direct_table;
1371  dst->list_counts = src->list_counts;
1372  dst->DPB = src->DPB;
1373  dst->cur_pic_ptr = src->cur_pic_ptr;
1374  dst->cur_pic = src->cur_pic;
1375  dst->bipred_scratchpad = NULL;
1376  dst->edge_emu_buffer = NULL;
1377  dst->me.scratchpad = NULL;
1379  src->sps.chroma_format_idc);
1380 }
1381 
1387 {
1388  ERContext *er = &h->er;
1389  int mb_array_size = h->mb_height * h->mb_stride;
1390  int y_size = (2 * h->mb_width + 1) * (2 * h->mb_height + 1);
1391  int c_size = h->mb_stride * (h->mb_height + 1);
1392  int yc_size = y_size + 2 * c_size;
1393  int x, y, i;
1394 
1396  h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1398  h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1399 
1400  h->ref_cache[0][scan8[5] + 1] =
1401  h->ref_cache[0][scan8[7] + 1] =
1402  h->ref_cache[0][scan8[13] + 1] =
1403  h->ref_cache[1][scan8[5] + 1] =
1404  h->ref_cache[1][scan8[7] + 1] =
1405  h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
1406 
1408  /* init ER */
1409  er->avctx = h->avctx;
1410  er->dsp = &h->dsp;
1412  er->opaque = h;
1413  er->quarter_sample = 1;
1414 
1415  er->mb_num = h->mb_num;
1416  er->mb_width = h->mb_width;
1417  er->mb_height = h->mb_height;
1418  er->mb_stride = h->mb_stride;
1419  er->b8_stride = h->mb_width * 2 + 1;
1420 
1421  FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int),
1422  fail); // error ressilience code looks cleaner with this
1423  for (y = 0; y < h->mb_height; y++)
1424  for (x = 0; x < h->mb_width; x++)
1425  er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride;
1426 
1427  er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) *
1428  h->mb_stride + h->mb_width;
1429 
1431  mb_array_size * sizeof(uint8_t), fail);
1432 
1433  FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail);
1434  memset(er->mbintra_table, 1, mb_array_size);
1435 
1436  FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail);
1437 
1439  fail);
1440 
1441  FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail);
1442  er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2;
1443  er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1;
1444  er->dc_val[2] = er->dc_val[1] + c_size;
1445  for (i = 0; i < yc_size; i++)
1446  h->dc_val_base[i] = 1024;
1447  }
1448 
1449  return 0;
1450 
1451 fail:
1452  return AVERROR(ENOMEM); // free_tables will clean up for us
1453 }
1454 
1455 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
1456  int parse_extradata);
1457 
1459 {
1460  AVCodecContext *avctx = h->avctx;
1461  int ret;
1462 
1463  if (avctx->extradata[0] == 1) {
1464  int i, cnt, nalsize;
1465  unsigned char *p = avctx->extradata;
1466 
1467  h->is_avc = 1;
1468 
1469  if (avctx->extradata_size < 7) {
1470  av_log(avctx, AV_LOG_ERROR,
1471  "avcC %d too short\n", avctx->extradata_size);
1472  return AVERROR_INVALIDDATA;
1473  }
1474  /* sps and pps in the avcC always have length coded with 2 bytes,
1475  * so put a fake nal_length_size = 2 while parsing them */
1476  h->nal_length_size = 2;
1477  // Decode sps from avcC
1478  cnt = *(p + 5) & 0x1f; // Number of sps
1479  p += 6;
1480  for (i = 0; i < cnt; i++) {
1481  nalsize = AV_RB16(p) + 2;
1482  if (p - avctx->extradata + nalsize > avctx->extradata_size)
1483  return AVERROR_INVALIDDATA;
1484  ret = decode_nal_units(h, p, nalsize, 1);
1485  if (ret < 0) {
1486  av_log(avctx, AV_LOG_ERROR,
1487  "Decoding sps %d from avcC failed\n", i);
1488  return ret;
1489  }
1490  p += nalsize;
1491  }
1492  // Decode pps from avcC
1493  cnt = *(p++); // Number of pps
1494  for (i = 0; i < cnt; i++) {
1495  nalsize = AV_RB16(p) + 2;
1496  if (p - avctx->extradata + nalsize > avctx->extradata_size)
1497  return AVERROR_INVALIDDATA;
1498  ret = decode_nal_units(h, p, nalsize, 1);
1499  if (ret < 0) {
1500  av_log(avctx, AV_LOG_ERROR,
1501  "Decoding pps %d from avcC failed\n", i);
1502  return ret;
1503  }
1504  p += nalsize;
1505  }
1506  // Now store right nal length size, that will be used to parse all other nals
1507  h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
1508  } else {
1509  h->is_avc = 0;
1510  ret = decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1);
1511  if (ret < 0)
1512  return ret;
1513  }
1514  return 0;
1515 }
1516 
1518 {
1519  H264Context *h = avctx->priv_data;
1520  int i;
1521  int ret;
1522 
1523  h->avctx = avctx;
1524 
1525  h->bit_depth_luma = 8;
1526  h->chroma_format_idc = 1;
1527 
1528  ff_h264dsp_init(&h->h264dsp, 8, 1);
1530  ff_h264qpel_init(&h->h264qpel, 8);
1531  ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1);
1532 
1533  h->dequant_coeff_pps = -1;
1534 
1535  /* needed so that IDCT permutation is known early */
1537  ff_dsputil_init(&h->dsp, h->avctx);
1538  ff_videodsp_init(&h->vdsp, 8);
1539 
1540  memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1541  memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1542 
1544  h->slice_context_count = 1;
1545  h->workaround_bugs = avctx->workaround_bugs;
1546  h->flags = avctx->flags;
1547 
1548  /* set defaults */
1549  // s->decode_mb = ff_h263_decode_mb;
1550  if (!avctx->has_b_frames)
1551  h->low_delay = 1;
1552 
1554 
1556 
1558 
1559  h->pixel_shift = 0;
1560  h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1561 
1562  h->thread_context[0] = h;
1563  h->outputed_poc = h->next_outputed_poc = INT_MIN;
1564  for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1565  h->last_pocs[i] = INT_MIN;
1566  h->prev_poc_msb = 1 << 16;
1567  h->x264_build = -1;
1568  ff_h264_reset_sei(h);
1569  h->recovery_frame = -1;
1570  h->frame_recovered = 0;
1571  if (avctx->codec_id == AV_CODEC_ID_H264) {
1572  if (avctx->ticks_per_frame == 1)
1573  h->avctx->time_base.den *= 2;
1574  avctx->ticks_per_frame = 2;
1575  }
1576 
1577  if (avctx->extradata_size > 0 && avctx->extradata) {
1578  ret = ff_h264_decode_extradata(h);
1579  if (ret < 0)
1580  return ret;
1581  }
1582 
1586  h->low_delay = 0;
1587  }
1588 
1589  avctx->internal->allocate_progress = 1;
1590 
1591  return 0;
1592 }
1593 
1594 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1595 #undef REBASE_PICTURE
1596 #define REBASE_PICTURE(pic, new_ctx, old_ctx) \
1597  ((pic && pic >= old_ctx->DPB && \
1598  pic < old_ctx->DPB + MAX_PICTURE_COUNT) ? \
1599  &new_ctx->DPB[pic - old_ctx->DPB] : NULL)
1600 
1601 static void copy_picture_range(Picture **to, Picture **from, int count,
1602  H264Context *new_base,
1603  H264Context *old_base)
1604 {
1605  int i;
1606 
1607  for (i = 0; i < count; i++) {
1608  assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
1609  IN_RANGE(from[i], old_base->DPB,
1610  sizeof(Picture) * MAX_PICTURE_COUNT) ||
1611  !from[i]));
1612  to[i] = REBASE_PICTURE(from[i], new_base, old_base);
1613  }
1614 }
1615 
1616 static int copy_parameter_set(void **to, void **from, int count, int size)
1617 {
1618  int i;
1619 
1620  for (i = 0; i < count; i++) {
1621  if (to[i] && !from[i]) {
1622  av_freep(&to[i]);
1623  } else if (from[i] && !to[i]) {
1624  to[i] = av_malloc(size);
1625  if (!to[i])
1626  return AVERROR(ENOMEM);
1627  }
1628 
1629  if (from[i])
1630  memcpy(to[i], from[i], size);
1631  }
1632 
1633  return 0;
1634 }
1635 
1637 {
1638  H264Context *h = avctx->priv_data;
1639 
1640  if (!avctx->internal->is_copy)
1641  return 0;
1642  memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1643  memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1644 
1645  h->avctx = avctx;
1646  h->rbsp_buffer[0] = NULL;
1647  h->rbsp_buffer[1] = NULL;
1648  h->rbsp_buffer_size[0] = 0;
1649  h->rbsp_buffer_size[1] = 0;
1650  h->context_initialized = 0;
1651 
1652  return 0;
1653 }
1654 
1655 #define copy_fields(to, from, start_field, end_field) \
1656  memcpy(&to->start_field, &from->start_field, \
1657  (char *)&to->end_field - (char *)&to->start_field)
1658 
1659 static int h264_slice_header_init(H264Context *, int);
1660 
1662 
1664  const AVCodecContext *src)
1665 {
1666  H264Context *h = dst->priv_data, *h1 = src->priv_data;
1667  int inited = h->context_initialized, err = 0;
1668  int context_reinitialized = 0;
1669  int i, ret;
1670 
1671  if (dst == src || !h1->context_initialized)
1672  return 0;
1673 
1674  if (inited &&
1675  (h->width != h1->width ||
1676  h->height != h1->height ||
1677  h->mb_width != h1->mb_width ||
1678  h->mb_height != h1->mb_height ||
1679  h->sps.bit_depth_luma != h1->sps.bit_depth_luma ||
1680  h->sps.chroma_format_idc != h1->sps.chroma_format_idc ||
1681  h->sps.colorspace != h1->sps.colorspace)) {
1682 
1683  /* set bits_per_raw_sample to the previous value. the check for changed
1684  * bit depth in h264_set_parameter_from_sps() uses it and sets it to
1685  * the current value */
1687 
1689 
1690  h->width = h1->width;
1691  h->height = h1->height;
1692  h->mb_height = h1->mb_height;
1693  h->mb_width = h1->mb_width;
1694  h->mb_num = h1->mb_num;
1695  h->mb_stride = h1->mb_stride;
1696  h->b_stride = h1->b_stride;
1697 
1698  if ((err = h264_slice_header_init(h, 1)) < 0) {
1699  av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
1700  return err;
1701  }
1702  context_reinitialized = 1;
1703 
1704  /* update linesize on resize. The decoder doesn't
1705  * necessarily call h264_frame_start in the new thread */
1706  h->linesize = h1->linesize;
1707  h->uvlinesize = h1->uvlinesize;
1708 
1709  /* copy block_offset since frame_start may not be called */
1710  memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
1711  }
1712 
1713  if (!inited) {
1714  for (i = 0; i < MAX_SPS_COUNT; i++)
1715  av_freep(h->sps_buffers + i);
1716 
1717  for (i = 0; i < MAX_PPS_COUNT; i++)
1718  av_freep(h->pps_buffers + i);
1719 
1720  memcpy(h, h1, sizeof(*h1));
1721  memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1722  memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1723  memset(&h->er, 0, sizeof(h->er));
1724  memset(&h->me, 0, sizeof(h->me));
1725  memset(&h->mb, 0, sizeof(h->mb));
1726  memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc));
1727  memset(&h->mb_padding, 0, sizeof(h->mb_padding));
1728  h->context_initialized = 0;
1729 
1730  memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1731  av_frame_unref(&h->cur_pic.f);
1732  h->cur_pic.tf.f = &h->cur_pic.f;
1733 
1734  h->avctx = dst;
1735  h->DPB = NULL;
1736  h->qscale_table_pool = NULL;
1737  h->mb_type_pool = NULL;
1738  h->ref_index_pool = NULL;
1739  h->motion_val_pool = NULL;
1740 
1741  ret = ff_h264_alloc_tables(h);
1742  if (ret < 0) {
1743  av_log(dst, AV_LOG_ERROR, "Could not allocate memory\n");
1744  return ret;
1745  }
1746  ret = context_init(h);
1747  if (ret < 0) {
1748  av_log(dst, AV_LOG_ERROR, "context_init() failed.\n");
1749  return ret;
1750  }
1751 
1752  for (i = 0; i < 2; i++) {
1753  h->rbsp_buffer[i] = NULL;
1754  h->rbsp_buffer_size[i] = 0;
1755  }
1756  h->bipred_scratchpad = NULL;
1757  h->edge_emu_buffer = NULL;
1758 
1759  h->thread_context[0] = h;
1760 
1761  h->context_initialized = 1;
1762  }
1763 
1764  h->avctx->coded_height = h1->avctx->coded_height;
1765  h->avctx->coded_width = h1->avctx->coded_width;
1766  h->avctx->width = h1->avctx->width;
1767  h->avctx->height = h1->avctx->height;
1768  h->coded_picture_number = h1->coded_picture_number;
1769  h->first_field = h1->first_field;
1770  h->picture_structure = h1->picture_structure;
1771  h->qscale = h1->qscale;
1772  h->droppable = h1->droppable;
1773  h->low_delay = h1->low_delay;
1774 
1775  for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1776  unref_picture(h, &h->DPB[i]);
1777  if (h1->DPB[i].f.buf[0] &&
1778  (ret = ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0)
1779  return ret;
1780  }
1781 
1782  h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1);
1783  unref_picture(h, &h->cur_pic);
1784  if (h1->cur_pic.f.buf[0]) {
1785  ret = ref_picture(h, &h->cur_pic, &h1->cur_pic);
1786  if (ret < 0)
1787  return ret;
1788  }
1789 
1790  h->workaround_bugs = h1->workaround_bugs;
1791  h->low_delay = h1->low_delay;
1792  h->droppable = h1->droppable;
1793 
1794  /* frame_start may not be called for the next thread (if it's decoding
1795  * a bottom field) so this has to be allocated here */
1796  err = alloc_scratch_buffers(h, h1->linesize);
1797  if (err < 0)
1798  return err;
1799 
1800  // extradata/NAL handling
1801  h->is_avc = h1->is_avc;
1802 
1803  // SPS/PPS
1804  if ((ret = copy_parameter_set((void **)h->sps_buffers,
1805  (void **)h1->sps_buffers,
1806  MAX_SPS_COUNT, sizeof(SPS))) < 0)
1807  return ret;
1808  h->sps = h1->sps;
1809  if ((ret = copy_parameter_set((void **)h->pps_buffers,
1810  (void **)h1->pps_buffers,
1811  MAX_PPS_COUNT, sizeof(PPS))) < 0)
1812  return ret;
1813  h->pps = h1->pps;
1814 
1815  // Dequantization matrices
1816  // FIXME these are big - can they be only copied when PPS changes?
1817  copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
1818 
1819  for (i = 0; i < 6; i++)
1820  h->dequant4_coeff[i] = h->dequant4_buffer[0] +
1821  (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
1822 
1823  for (i = 0; i < 6; i++)
1824  h->dequant8_coeff[i] = h->dequant8_buffer[0] +
1825  (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
1826 
1827  h->dequant_coeff_pps = h1->dequant_coeff_pps;
1828 
1829  // POC timing
1830  copy_fields(h, h1, poc_lsb, redundant_pic_count);
1831 
1832  // reference lists
1833  copy_fields(h, h1, short_ref, cabac_init_idc);
1834 
1835  copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1);
1836  copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1);
1837  copy_picture_range(h->delayed_pic, h1->delayed_pic,
1838  MAX_DELAYED_PIC_COUNT + 2, h, h1);
1839 
1840  h->last_slice_type = h1->last_slice_type;
1841 
1842  if (context_reinitialized)
1844 
1845  if (!h->cur_pic_ptr)
1846  return 0;
1847 
1848  if (!h->droppable) {
1850  h->prev_poc_msb = h->poc_msb;
1851  h->prev_poc_lsb = h->poc_lsb;
1852  }
1854  h->prev_frame_num = h->frame_num;
1856 
1857  h->recovery_frame = h1->recovery_frame;
1858  h->frame_recovered = h1->frame_recovered;
1859 
1860  return err;
1861 }
1862 
1864 {
1865  Picture *pic;
1866  int i, ret;
1867  const int pixel_shift = h->pixel_shift;
1868 
1870  h->cur_pic_ptr = NULL;
1871 
1872  i = find_unused_picture(h);
1873  if (i < 0) {
1874  av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n");
1875  return i;
1876  }
1877  pic = &h->DPB[i];
1878 
1879  pic->reference = h->droppable ? 0 : h->picture_structure;
1882  /*
1883  * Zero key_frame here; IDR markings per slice in frame or fields are ORed
1884  * in later.
1885  * See decode_nal_units().
1886  */
1887  pic->f.key_frame = 0;
1888  pic->mmco_reset = 0;
1889  pic->recovered = 0;
1890 
1891  if ((ret = alloc_picture(h, pic)) < 0)
1892  return ret;
1893 
1894  h->cur_pic_ptr = pic;
1895  unref_picture(h, &h->cur_pic);
1896  if ((ret = ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0)
1897  return ret;
1898 
1900  ff_er_frame_start(&h->er);
1901 
1902  assert(h->linesize && h->uvlinesize);
1903 
1904  for (i = 0; i < 16; i++) {
1905  h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1906  h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1907  }
1908  for (i = 0; i < 16; i++) {
1909  h->block_offset[16 + i] =
1910  h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1911  h->block_offset[48 + 16 + i] =
1912  h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1913  }
1914 
1915  /* can't be in alloc_tables because linesize isn't known there.
1916  * FIXME: redo bipred weight to not require extra buffer? */
1917  for (i = 0; i < h->slice_context_count; i++)
1918  if (h->thread_context[i]) {
1920  if (ret < 0)
1921  return ret;
1922  }
1923 
1924  /* Some macroblocks can be accessed before they're available in case
1925  * of lost slices, MBAFF or threading. */
1926  memset(h->slice_table, -1,
1927  (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
1928 
1929  // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1930  // s->current_picture.f.reference /* || h->contains_intra */ || 1;
1931 
1932  /* We mark the current picture as non-reference after allocating it, so
1933  * that if we break out due to an error it can be released automatically
1934  * in the next ff_MPV_frame_start().
1935  */
1936  h->cur_pic_ptr->reference = 0;
1937 
1938  h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX;
1939 
1940  h->next_output_pic = NULL;
1941 
1942  assert(h->cur_pic_ptr->long_ref == 0);
1943 
1944  return 0;
1945 }
1946 
1955 static void decode_postinit(H264Context *h, int setup_finished)
1956 {
1957  Picture *out = h->cur_pic_ptr;
1958  Picture *cur = h->cur_pic_ptr;
1959  int i, pics, out_of_order, out_idx;
1960  int invalid = 0, cnt = 0;
1961 
1962  h->cur_pic_ptr->f.pict_type = h->pict_type;
1963 
1964  if (h->next_output_pic)
1965  return;
1966 
1967  if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
1968  /* FIXME: if we have two PAFF fields in one packet, we can't start
1969  * the next thread here. If we have one field per packet, we can.
1970  * The check in decode_nal_units() is not good enough to find this
1971  * yet, so we assume the worst for now. */
1972  // if (setup_finished)
1973  // ff_thread_finish_setup(h->avctx);
1974  return;
1975  }
1976 
1977  cur->f.interlaced_frame = 0;
1978  cur->f.repeat_pict = 0;
1979 
1980  /* Signal interlacing information externally. */
1981  /* Prioritize picture timing SEI information over used
1982  * decoding process if it exists. */
1983 
1984  if (h->sps.pic_struct_present_flag) {
1985  switch (h->sei_pic_struct) {
1986  case SEI_PIC_STRUCT_FRAME:
1987  break;
1990  cur->f.interlaced_frame = 1;
1991  break;
1994  if (FIELD_OR_MBAFF_PICTURE(h))
1995  cur->f.interlaced_frame = 1;
1996  else
1997  // try to flag soft telecine progressive
1999  break;
2002  /* Signal the possibility of telecined film externally
2003  * (pic_struct 5,6). From these hints, let the applications
2004  * decide if they apply deinterlacing. */
2005  cur->f.repeat_pict = 1;
2006  break;
2008  cur->f.repeat_pict = 2;
2009  break;
2011  cur->f.repeat_pict = 4;
2012  break;
2013  }
2014 
2015  if ((h->sei_ct_type & 3) &&
2017  cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
2018  } else {
2019  /* Derive interlacing flag from used decoding process. */
2021  }
2023 
2024  if (cur->field_poc[0] != cur->field_poc[1]) {
2025  /* Derive top_field_first from field pocs. */
2026  cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
2027  } else {
2028  if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
2029  /* Use picture timing SEI information. Even if it is a
2030  * information of a past frame, better than nothing. */
2033  cur->f.top_field_first = 1;
2034  else
2035  cur->f.top_field_first = 0;
2036  } else {
2037  /* Most likely progressive */
2038  cur->f.top_field_first = 0;
2039  }
2040  }
2041 
2042  if (h->sei_frame_packing_present &&
2045  h->content_interpretation_type > 0 &&
2046  h->content_interpretation_type < 3) {
2047  AVStereo3D *stereo = av_stereo3d_create_side_data(&cur->f);
2048  if (!stereo)
2049  return;
2050 
2051  switch (h->frame_packing_arrangement_type) {
2052  case 0:
2053  stereo->type = AV_STEREO3D_CHECKERBOARD;
2054  break;
2055  case 1:
2056  stereo->type = AV_STEREO3D_COLUMNS;
2057  break;
2058  case 2:
2059  stereo->type = AV_STEREO3D_LINES;
2060  break;
2061  case 3:
2062  if (h->quincunx_subsampling)
2064  else
2065  stereo->type = AV_STEREO3D_SIDEBYSIDE;
2066  break;
2067  case 4:
2068  stereo->type = AV_STEREO3D_TOPBOTTOM;
2069  break;
2070  case 5:
2071  stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2072  break;
2073  case 6:
2074  stereo->type = AV_STEREO3D_2D;
2075  break;
2076  }
2077 
2078  if (h->content_interpretation_type == 2)
2079  stereo->flags = AV_STEREO3D_FLAG_INVERT;
2080  }
2081 
2082  // FIXME do something with unavailable reference frames
2083 
2084  /* Sort B-frames into display order */
2085 
2089  h->low_delay = 0;
2090  }
2091 
2095  h->low_delay = 0;
2096  }
2097 
2098  pics = 0;
2099  while (h->delayed_pic[pics])
2100  pics++;
2101 
2102  assert(pics <= MAX_DELAYED_PIC_COUNT);
2103 
2104  h->delayed_pic[pics++] = cur;
2105  if (cur->reference == 0)
2106  cur->reference = DELAYED_PIC_REF;
2107 
2108  /* Frame reordering. This code takes pictures from coding order and sorts
2109  * them by their incremental POC value into display order. It supports POC
2110  * gaps, MMCO reset codes and random resets.
2111  * A "display group" can start either with a IDR frame (f.key_frame = 1),
2112  * and/or can be closed down with a MMCO reset code. In sequences where
2113  * there is no delay, we can't detect that (since the frame was already
2114  * output to the user), so we also set h->mmco_reset to detect the MMCO
2115  * reset code.
2116  * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
2117  * we increase the delay between input and output. All frames affected by
2118  * the lag (e.g. those that should have been output before another frame
2119  * that we already returned to the user) will be dropped. This is a bug
2120  * that we will fix later. */
2121  for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2122  cnt += out->poc < h->last_pocs[i];
2123  invalid += out->poc == INT_MIN;
2124  }
2125  if (!h->mmco_reset && !cur->f.key_frame &&
2126  cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
2127  h->mmco_reset = 2;
2128  if (pics > 1)
2129  h->delayed_pic[pics - 2]->mmco_reset = 2;
2130  }
2131  if (h->mmco_reset || cur->f.key_frame) {
2132  for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2133  h->last_pocs[i] = INT_MIN;
2134  cnt = 0;
2135  invalid = MAX_DELAYED_PIC_COUNT;
2136  }
2137  out = h->delayed_pic[0];
2138  out_idx = 0;
2139  for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
2140  h->delayed_pic[i] &&
2141  !h->delayed_pic[i - 1]->mmco_reset &&
2142  !h->delayed_pic[i]->f.key_frame;
2143  i++)
2144  if (h->delayed_pic[i]->poc < out->poc) {
2145  out = h->delayed_pic[i];
2146  out_idx = i;
2147  }
2148  if (h->avctx->has_b_frames == 0 &&
2149  (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
2150  h->next_outputed_poc = INT_MIN;
2151  out_of_order = !out->f.key_frame && !h->mmco_reset &&
2152  (out->poc < h->next_outputed_poc);
2153 
2156  } else if (out_of_order && pics - 1 == h->avctx->has_b_frames &&
2157  h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
2158  if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
2159  h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt);
2160  }
2161  h->low_delay = 0;
2162  } else if (h->low_delay &&
2163  ((h->next_outputed_poc != INT_MIN &&
2164  out->poc > h->next_outputed_poc + 2) ||
2165  cur->f.pict_type == AV_PICTURE_TYPE_B)) {
2166  h->low_delay = 0;
2167  h->avctx->has_b_frames++;
2168  }
2169 
2170  if (pics > h->avctx->has_b_frames) {
2171  out->reference &= ~DELAYED_PIC_REF;
2172  // for frame threading, the owner must be the second field's thread or
2173  // else the first thread can release the picture and reuse it unsafely
2174  for (i = out_idx; h->delayed_pic[i]; i++)
2175  h->delayed_pic[i] = h->delayed_pic[i + 1];
2176  }
2177  memmove(h->last_pocs, &h->last_pocs[1],
2178  sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
2179  h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
2180  if (!out_of_order && pics > h->avctx->has_b_frames) {
2181  h->next_output_pic = out;
2182  if (out->mmco_reset) {
2183  if (out_idx > 0) {
2184  h->next_outputed_poc = out->poc;
2185  h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
2186  } else {
2187  h->next_outputed_poc = INT_MIN;
2188  }
2189  } else {
2190  if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
2191  h->next_outputed_poc = INT_MIN;
2192  } else {
2193  h->next_outputed_poc = out->poc;
2194  }
2195  }
2196  h->mmco_reset = 0;
2197  } else {
2198  av_log(h->avctx, AV_LOG_DEBUG, "no picture\n");
2199  }
2200 
2201  if (h->next_output_pic) {
2202  if (h->next_output_pic->recovered) {
2203  // We have reached an recovery point and all frames after it in
2204  // display order are "recovered".
2206  }
2208  }
2209 
2210  if (setup_finished && !h->avctx->hwaccel)
2212 }
2213 
2215  uint8_t *src_cb, uint8_t *src_cr,
2216  int linesize, int uvlinesize,
2217  int simple)
2218 {
2219  uint8_t *top_border;
2220  int top_idx = 1;
2221  const int pixel_shift = h->pixel_shift;
2222  int chroma444 = CHROMA444(h);
2223  int chroma422 = CHROMA422(h);
2224 
2225  src_y -= linesize;
2226  src_cb -= uvlinesize;
2227  src_cr -= uvlinesize;
2228 
2229  if (!simple && FRAME_MBAFF(h)) {
2230  if (h->mb_y & 1) {
2231  if (!MB_MBAFF(h)) {
2232  top_border = h->top_borders[0][h->mb_x];
2233  AV_COPY128(top_border, src_y + 15 * linesize);
2234  if (pixel_shift)
2235  AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
2236  if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2237  if (chroma444) {
2238  if (pixel_shift) {
2239  AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
2240  AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
2241  AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
2242  AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
2243  } else {
2244  AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
2245  AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
2246  }
2247  } else if (chroma422) {
2248  if (pixel_shift) {
2249  AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
2250  AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
2251  } else {
2252  AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
2253  AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
2254  }
2255  } else {
2256  if (pixel_shift) {
2257  AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
2258  AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
2259  } else {
2260  AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
2261  AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
2262  }
2263  }
2264  }
2265  }
2266  } else if (MB_MBAFF(h)) {
2267  top_idx = 0;
2268  } else
2269  return;
2270  }
2271 
2272  top_border = h->top_borders[top_idx][h->mb_x];
2273  /* There are two lines saved, the line above the top macroblock
2274  * of a pair, and the line above the bottom macroblock. */
2275  AV_COPY128(top_border, src_y + 16 * linesize);
2276  if (pixel_shift)
2277  AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
2278 
2279  if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2280  if (chroma444) {
2281  if (pixel_shift) {
2282  AV_COPY128(top_border + 32, src_cb + 16 * linesize);
2283  AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
2284  AV_COPY128(top_border + 64, src_cr + 16 * linesize);
2285  AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
2286  } else {
2287  AV_COPY128(top_border + 16, src_cb + 16 * linesize);
2288  AV_COPY128(top_border + 32, src_cr + 16 * linesize);
2289  }
2290  } else if (chroma422) {
2291  if (pixel_shift) {
2292  AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
2293  AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
2294  } else {
2295  AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
2296  AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
2297  }
2298  } else {
2299  if (pixel_shift) {
2300  AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
2301  AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
2302  } else {
2303  AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
2304  AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
2305  }
2306  }
2307  }
2308 }
2309 
2311  uint8_t *src_cb, uint8_t *src_cr,
2312  int linesize, int uvlinesize,
2313  int xchg, int chroma444,
2314  int simple, int pixel_shift)
2315 {
2316  int deblock_topleft;
2317  int deblock_top;
2318  int top_idx = 1;
2319  uint8_t *top_border_m1;
2320  uint8_t *top_border;
2321 
2322  if (!simple && FRAME_MBAFF(h)) {
2323  if (h->mb_y & 1) {
2324  if (!MB_MBAFF(h))
2325  return;
2326  } else {
2327  top_idx = MB_MBAFF(h) ? 0 : 1;
2328  }
2329  }
2330 
2331  if (h->deblocking_filter == 2) {
2332  deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num;
2333  deblock_top = h->top_type;
2334  } else {
2335  deblock_topleft = (h->mb_x > 0);
2336  deblock_top = (h->mb_y > !!MB_FIELD(h));
2337  }
2338 
2339  src_y -= linesize + 1 + pixel_shift;
2340  src_cb -= uvlinesize + 1 + pixel_shift;
2341  src_cr -= uvlinesize + 1 + pixel_shift;
2342 
2343  top_border_m1 = h->top_borders[top_idx][h->mb_x - 1];
2344  top_border = h->top_borders[top_idx][h->mb_x];
2345 
2346 #define XCHG(a, b, xchg) \
2347  if (pixel_shift) { \
2348  if (xchg) { \
2349  AV_SWAP64(b + 0, a + 0); \
2350  AV_SWAP64(b + 8, a + 8); \
2351  } else { \
2352  AV_COPY128(b, a); \
2353  } \
2354  } else if (xchg) \
2355  AV_SWAP64(b, a); \
2356  else \
2357  AV_COPY64(b, a);
2358 
2359  if (deblock_top) {
2360  if (deblock_topleft) {
2361  XCHG(top_border_m1 + (8 << pixel_shift),
2362  src_y - (7 << pixel_shift), 1);
2363  }
2364  XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
2365  XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
2366  if (h->mb_x + 1 < h->mb_width) {
2367  XCHG(h->top_borders[top_idx][h->mb_x + 1],
2368  src_y + (17 << pixel_shift), 1);
2369  }
2370  }
2371  if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2372  if (chroma444) {
2373  if (deblock_top) {
2374  if (deblock_topleft) {
2375  XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2376  XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2377  }
2378  XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
2379  XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
2380  XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
2381  XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
2382  if (h->mb_x + 1 < h->mb_width) {
2383  XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
2384  XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
2385  }
2386  }
2387  } else {
2388  if (deblock_top) {
2389  if (deblock_topleft) {
2390  XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2391  XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2392  }
2393  XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
2394  XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
2395  }
2396  }
2397  }
2398 }
2399 
2400 static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
2401  int index)
2402 {
2403  if (high_bit_depth) {
2404  return AV_RN32A(((int32_t *)mb) + index);
2405  } else
2406  return AV_RN16A(mb + index);
2407 }
2408 
2409 static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
2410  int index, int value)
2411 {
2412  if (high_bit_depth) {
2413  AV_WN32A(((int32_t *)mb) + index, value);
2414  } else
2415  AV_WN16A(mb + index, value);
2416 }
2417 
2419  int mb_type, int is_h264,
2420  int simple,
2421  int transform_bypass,
2422  int pixel_shift,
2423  int *block_offset,
2424  int linesize,
2425  uint8_t *dest_y, int p)
2426 {
2427  void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
2428  void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
2429  int i;
2430  int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1];
2431  block_offset += 16 * p;
2432  if (IS_INTRA4x4(mb_type)) {
2433  if (IS_8x8DCT(mb_type)) {
2434  if (transform_bypass) {
2435  idct_dc_add =
2437  } else {
2438  idct_dc_add = h->h264dsp.h264_idct8_dc_add;
2440  }
2441  for (i = 0; i < 16; i += 4) {
2442  uint8_t *const ptr = dest_y + block_offset[i];
2443  const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
2444  if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2445  h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2446  } else {
2447  const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2448  h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
2449  (h->topright_samples_available << i) & 0x4000, linesize);
2450  if (nnz) {
2451  if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2452  idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2453  else
2454  idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2455  }
2456  }
2457  }
2458  } else {
2459  if (transform_bypass) {
2460  idct_dc_add =
2462  } else {
2463  idct_dc_add = h->h264dsp.h264_idct_dc_add;
2465  }
2466  for (i = 0; i < 16; i++) {
2467  uint8_t *const ptr = dest_y + block_offset[i];
2468  const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
2469 
2470  if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2471  h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2472  } else {
2473  uint8_t *topright;
2474  int nnz, tr;
2475  uint64_t tr_high;
2476  if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
2477  const int topright_avail = (h->topright_samples_available << i) & 0x8000;
2478  assert(h->mb_y || linesize <= block_offset[i]);
2479  if (!topright_avail) {
2480  if (pixel_shift) {
2481  tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
2482  topright = (uint8_t *)&tr_high;
2483  } else {
2484  tr = ptr[3 - linesize] * 0x01010101u;
2485  topright = (uint8_t *)&tr;
2486  }
2487  } else
2488  topright = ptr + (4 << pixel_shift) - linesize;
2489  } else
2490  topright = NULL;
2491 
2492  h->hpc.pred4x4[dir](ptr, topright, linesize);
2493  nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2494  if (nnz) {
2495  if (is_h264) {
2496  if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2497  idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2498  else
2499  idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2500  } else if (CONFIG_SVQ3_DECODER)
2501  ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
2502  }
2503  }
2504  }
2505  }
2506  } else {
2507  h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
2508  if (is_h264) {
2510  if (!transform_bypass)
2511  h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
2512  h->mb_luma_dc[p],
2513  h->dequant4_coeff[p][qscale][0]);
2514  else {
2515  static const uint8_t dc_mapping[16] = {
2516  0 * 16, 1 * 16, 4 * 16, 5 * 16,
2517  2 * 16, 3 * 16, 6 * 16, 7 * 16,
2518  8 * 16, 9 * 16, 12 * 16, 13 * 16,
2519  10 * 16, 11 * 16, 14 * 16, 15 * 16
2520  };
2521  for (i = 0; i < 16; i++)
2522  dctcoef_set(h->mb + (p * 256 << pixel_shift),
2523  pixel_shift, dc_mapping[i],
2524  dctcoef_get(h->mb_luma_dc[p],
2525  pixel_shift, i));
2526  }
2527  }
2528  } else if (CONFIG_SVQ3_DECODER)
2529  ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
2530  h->mb_luma_dc[p], qscale);
2531  }
2532 }
2533 
2535  int is_h264, int simple,
2536  int transform_bypass,
2537  int pixel_shift,
2538  int *block_offset,
2539  int linesize,
2540  uint8_t *dest_y, int p)
2541 {
2542  void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
2543  int i;
2544  block_offset += 16 * p;
2545  if (!IS_INTRA4x4(mb_type)) {
2546  if (is_h264) {
2547  if (IS_INTRA16x16(mb_type)) {
2548  if (transform_bypass) {
2549  if (h->sps.profile_idc == 244 &&
2552  h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
2553  h->mb + (p * 256 << pixel_shift),
2554  linesize);
2555  } else {
2556  for (i = 0; i < 16; i++)
2557  if (h->non_zero_count_cache[scan8[i + p * 16]] ||
2558  dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2559  h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i],
2560  h->mb + (i * 16 + p * 256 << pixel_shift),
2561  linesize);
2562  }
2563  } else {
2564  h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
2565  h->mb + (p * 256 << pixel_shift),
2566  linesize,
2567  h->non_zero_count_cache + p * 5 * 8);
2568  }
2569  } else if (h->cbp & 15) {
2570  if (transform_bypass) {
2571  const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2574  for (i = 0; i < 16; i += di)
2575  if (h->non_zero_count_cache[scan8[i + p * 16]])
2576  idct_add(dest_y + block_offset[i],
2577  h->mb + (i * 16 + p * 256 << pixel_shift),
2578  linesize);
2579  } else {
2580  if (IS_8x8DCT(mb_type))
2581  h->h264dsp.h264_idct8_add4(dest_y, block_offset,
2582  h->mb + (p * 256 << pixel_shift),
2583  linesize,
2584  h->non_zero_count_cache + p * 5 * 8);
2585  else
2586  h->h264dsp.h264_idct_add16(dest_y, block_offset,
2587  h->mb + (p * 256 << pixel_shift),
2588  linesize,
2589  h->non_zero_count_cache + p * 5 * 8);
2590  }
2591  }
2592  } else if (CONFIG_SVQ3_DECODER) {
2593  for (i = 0; i < 16; i++)
2594  if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
2595  // FIXME benchmark weird rule, & below
2596  uint8_t *const ptr = dest_y + block_offset[i];
2597  ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
2598  h->qscale, IS_INTRA(mb_type) ? 1 : 0);
2599  }
2600  }
2601  }
2602 }
2603 
2604 #define BITS 8
2605 #define SIMPLE 1
2606 #include "h264_mb_template.c"
2607 
2608 #undef BITS
2609 #define BITS 16
2610 #include "h264_mb_template.c"
2611 
2612 #undef SIMPLE
2613 #define SIMPLE 0
2614 #include "h264_mb_template.c"
2615 
2617 {
2618  const int mb_xy = h->mb_xy;
2619  const int mb_type = h->cur_pic.mb_type[mb_xy];
2620  int is_complex = CONFIG_SMALL || h->is_complex ||
2621  IS_INTRA_PCM(mb_type) || h->qscale == 0;
2622 
2623  if (CHROMA444(h)) {
2624  if (is_complex || h->pixel_shift)
2625  hl_decode_mb_444_complex(h);
2626  else
2627  hl_decode_mb_444_simple_8(h);
2628  } else if (is_complex) {
2629  hl_decode_mb_complex(h);
2630  } else if (h->pixel_shift) {
2631  hl_decode_mb_simple_16(h);
2632  } else
2633  hl_decode_mb_simple_8(h);
2634 }
2635 
2637 {
2638  int list, i;
2639  int luma_def, chroma_def;
2640 
2641  h->use_weight = 0;
2642  h->use_weight_chroma = 0;
2644  if (h->sps.chroma_format_idc)
2646  luma_def = 1 << h->luma_log2_weight_denom;
2647  chroma_def = 1 << h->chroma_log2_weight_denom;
2648 
2649  for (list = 0; list < 2; list++) {
2650  h->luma_weight_flag[list] = 0;
2651  h->chroma_weight_flag[list] = 0;
2652  for (i = 0; i < h->ref_count[list]; i++) {
2653  int luma_weight_flag, chroma_weight_flag;
2654 
2655  luma_weight_flag = get_bits1(&h->gb);
2656  if (luma_weight_flag) {
2657  h->luma_weight[i][list][0] = get_se_golomb(&h->gb);
2658  h->luma_weight[i][list][1] = get_se_golomb(&h->gb);
2659  if (h->luma_weight[i][list][0] != luma_def ||
2660  h->luma_weight[i][list][1] != 0) {
2661  h->use_weight = 1;
2662  h->luma_weight_flag[list] = 1;
2663  }
2664  } else {
2665  h->luma_weight[i][list][0] = luma_def;
2666  h->luma_weight[i][list][1] = 0;
2667  }
2668 
2669  if (h->sps.chroma_format_idc) {
2670  chroma_weight_flag = get_bits1(&h->gb);
2671  if (chroma_weight_flag) {
2672  int j;
2673  for (j = 0; j < 2; j++) {
2674  h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
2675  h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
2676  if (h->chroma_weight[i][list][j][0] != chroma_def ||
2677  h->chroma_weight[i][list][j][1] != 0) {
2678  h->use_weight_chroma = 1;
2679  h->chroma_weight_flag[list] = 1;
2680  }
2681  }
2682  } else {
2683  int j;
2684  for (j = 0; j < 2; j++) {
2685  h->chroma_weight[i][list][j][0] = chroma_def;
2686  h->chroma_weight[i][list][j][1] = 0;
2687  }
2688  }
2689  }
2690  }
2692  break;
2693  }
2694  h->use_weight = h->use_weight || h->use_weight_chroma;
2695  return 0;
2696 }
2697 
2703 static void implicit_weight_table(H264Context *h, int field)
2704 {
2705  int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
2706 
2707  for (i = 0; i < 2; i++) {
2708  h->luma_weight_flag[i] = 0;
2709  h->chroma_weight_flag[i] = 0;
2710  }
2711 
2712  if (field < 0) {
2713  if (h->picture_structure == PICT_FRAME) {
2714  cur_poc = h->cur_pic_ptr->poc;
2715  } else {
2716  cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1];
2717  }
2718  if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
2719  h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
2720  h->use_weight = 0;
2721  h->use_weight_chroma = 0;
2722  return;
2723  }
2724  ref_start = 0;
2725  ref_count0 = h->ref_count[0];
2726  ref_count1 = h->ref_count[1];
2727  } else {
2728  cur_poc = h->cur_pic_ptr->field_poc[field];
2729  ref_start = 16;
2730  ref_count0 = 16 + 2 * h->ref_count[0];
2731  ref_count1 = 16 + 2 * h->ref_count[1];
2732  }
2733 
2734  h->use_weight = 2;
2735  h->use_weight_chroma = 2;
2736  h->luma_log2_weight_denom = 5;
2737  h->chroma_log2_weight_denom = 5;
2738 
2739  for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
2740  int poc0 = h->ref_list[0][ref0].poc;
2741  for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
2742  int w = 32;
2743  if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
2744  int poc1 = h->ref_list[1][ref1].poc;
2745  int td = av_clip(poc1 - poc0, -128, 127);
2746  if (td) {
2747  int tb = av_clip(cur_poc - poc0, -128, 127);
2748  int tx = (16384 + (FFABS(td) >> 1)) / td;
2749  int dist_scale_factor = (tb * tx + 32) >> 8;
2750  if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
2751  w = 64 - dist_scale_factor;
2752  }
2753  }
2754  if (field < 0) {
2755  h->implicit_weight[ref0][ref1][0] =
2756  h->implicit_weight[ref0][ref1][1] = w;
2757  } else {
2758  h->implicit_weight[ref0][ref1][field] = w;
2759  }
2760  }
2761  }
2762 }
2763 
2767 static void idr(H264Context *h)
2768 {
2770  h->prev_frame_num = 0;
2771  h->prev_frame_num_offset = 0;
2772  h->prev_poc_msb =
2773  h->prev_poc_lsb = 0;
2774 }
2775 
2776 /* forget old pics after a seek */
2777 static void flush_change(H264Context *h)
2778 {
2779  int i;
2780  for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2781  h->last_pocs[i] = INT_MIN;
2782  h->outputed_poc = h->next_outputed_poc = INT_MIN;
2783  h->prev_interlaced_frame = 1;
2784  idr(h);
2785  if (h->cur_pic_ptr)
2786  h->cur_pic_ptr->reference = 0;
2787  h->first_field = 0;
2788  memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
2789  memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
2790  memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0]));
2791  memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1]));
2792  ff_h264_reset_sei(h);
2793  h->recovery_frame = -1;
2794  h->frame_recovered = 0;
2795 }
2796 
2797 /* forget old pics after a seek */
2798 static void flush_dpb(AVCodecContext *avctx)
2799 {
2800  H264Context *h = avctx->priv_data;
2801  int i;
2802 
2803  for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2804  if (h->delayed_pic[i])
2805  h->delayed_pic[i]->reference = 0;
2806  h->delayed_pic[i] = NULL;
2807  }
2808 
2809  flush_change(h);
2810 
2811  if (h->DPB)
2812  for (i = 0; i < MAX_PICTURE_COUNT; i++)
2813  unref_picture(h, &h->DPB[i]);
2814  h->cur_pic_ptr = NULL;
2815  unref_picture(h, &h->cur_pic);
2816 
2817  h->mb_x = h->mb_y = 0;
2818 
2819  h->parse_context.state = -1;
2821  h->parse_context.overread = 0;
2823  h->parse_context.index = 0;
2824  h->parse_context.last_index = 0;
2825 
2826  free_tables(h, 1);
2827  h->context_initialized = 0;
2828 }
2829 
2830 int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc)
2831 {
2832  const int max_frame_num = 1 << h->sps.log2_max_frame_num;
2833  int field_poc[2];
2834 
2836  if (h->frame_num < h->prev_frame_num)
2837  h->frame_num_offset += max_frame_num;
2838 
2839  if (h->sps.poc_type == 0) {
2840  const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
2841 
2842  if (h->poc_lsb < h->prev_poc_lsb &&
2843  h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
2844  h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2845  else if (h->poc_lsb > h->prev_poc_lsb &&
2846  h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
2847  h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2848  else
2849  h->poc_msb = h->prev_poc_msb;
2850  field_poc[0] =
2851  field_poc[1] = h->poc_msb + h->poc_lsb;
2852  if (h->picture_structure == PICT_FRAME)
2853  field_poc[1] += h->delta_poc_bottom;
2854  } else if (h->sps.poc_type == 1) {
2855  int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2856  int i;
2857 
2858  if (h->sps.poc_cycle_length != 0)
2859  abs_frame_num = h->frame_num_offset + h->frame_num;
2860  else
2861  abs_frame_num = 0;
2862 
2863  if (h->nal_ref_idc == 0 && abs_frame_num > 0)
2864  abs_frame_num--;
2865 
2866  expected_delta_per_poc_cycle = 0;
2867  for (i = 0; i < h->sps.poc_cycle_length; i++)
2868  // FIXME integrate during sps parse
2869  expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
2870 
2871  if (abs_frame_num > 0) {
2872  int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2873  int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2874 
2875  expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2876  for (i = 0; i <= frame_num_in_poc_cycle; i++)
2877  expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
2878  } else
2879  expectedpoc = 0;
2880 
2881  if (h->nal_ref_idc == 0)
2882  expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2883 
2884  field_poc[0] = expectedpoc + h->delta_poc[0];
2885  field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2886 
2887  if (h->picture_structure == PICT_FRAME)
2888  field_poc[1] += h->delta_poc[1];
2889  } else {
2890  int poc = 2 * (h->frame_num_offset + h->frame_num);
2891 
2892  if (!h->nal_ref_idc)
2893  poc--;
2894 
2895  field_poc[0] = poc;
2896  field_poc[1] = poc;
2897  }
2898 
2900  pic_field_poc[0] = field_poc[0];
2902  pic_field_poc[1] = field_poc[1];
2903  *pic_poc = FFMIN(pic_field_poc[0], pic_field_poc[1]);
2904 
2905  return 0;
2906 }
2907 
2912 {
2913  int i;
2914  for (i = 0; i < 16; i++) {
2915 #define T(x) (x >> 2) | ((x << 2) & 0xF)
2916  h->zigzag_scan[i] = T(zigzag_scan[i]);
2917  h->field_scan[i] = T(field_scan[i]);
2918 #undef T
2919  }
2920  for (i = 0; i < 64; i++) {
2921 #define T(x) (x >> 3) | ((x & 7) << 3)
2922  h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
2924  h->field_scan8x8[i] = T(field_scan8x8[i]);
2926 #undef T
2927  }
2928  if (h->sps.transform_bypass) { // FIXME same ugly
2935  } else {
2936  h->zigzag_scan_q0 = h->zigzag_scan;
2939  h->field_scan_q0 = h->field_scan;
2942  }
2943 }
2944 
2945 static int field_end(H264Context *h, int in_setup)
2946 {
2947  AVCodecContext *const avctx = h->avctx;
2948  int err = 0;
2949  h->mb_y = 0;
2950 
2951  if (!in_setup && !h->droppable)
2954 
2955  if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
2956  if (!h->droppable) {
2958  h->prev_poc_msb = h->poc_msb;
2959  h->prev_poc_lsb = h->poc_lsb;
2960  }
2962  h->prev_frame_num = h->frame_num;
2964  }
2965 
2966  if (avctx->hwaccel) {
2967  if (avctx->hwaccel->end_frame(avctx) < 0)
2968  av_log(avctx, AV_LOG_ERROR,
2969  "hardware accelerator failed to decode picture\n");
2970  }
2971 
2972  /*
2973  * FIXME: Error handling code does not seem to support interlaced
2974  * when slices span multiple rows
2975  * The ff_er_add_slice calls don't work right for bottom
2976  * fields; they cause massive erroneous error concealing
2977  * Error marking covers both fields (top and bottom).
2978  * This causes a mismatched s->error_count
2979  * and a bad error table. Further, the error count goes to
2980  * INT_MAX when called for bottom field, because mb_y is
2981  * past end by one (callers fault) and resync_mb_y != 0
2982  * causes problems for the first MB line, too.
2983  */
2985  h->er.cur_pic = h->cur_pic_ptr;
2986  h->er.last_pic = h->ref_count[0] ? &h->ref_list[0][0] : NULL;
2987  h->er.next_pic = h->ref_count[1] ? &h->ref_list[1][0] : NULL;
2988  ff_er_frame_end(&h->er);
2989  }
2990  emms_c();
2991 
2992  h->current_slice = 0;
2993 
2994  return err;
2995 }
2996 
3000 static int clone_slice(H264Context *dst, H264Context *src)
3001 {
3002  memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3003  dst->cur_pic_ptr = src->cur_pic_ptr;
3004  dst->cur_pic = src->cur_pic;
3005  dst->linesize = src->linesize;
3006  dst->uvlinesize = src->uvlinesize;
3007  dst->first_field = src->first_field;
3008 
3009  dst->prev_poc_msb = src->prev_poc_msb;
3010  dst->prev_poc_lsb = src->prev_poc_lsb;
3012  dst->prev_frame_num = src->prev_frame_num;
3013  dst->short_ref_count = src->short_ref_count;
3014 
3015  memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3016  memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3017  memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3018 
3019  memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3020  memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3021 
3022  return 0;
3023 }
3024 
3033 {
3034  int profile = sps->profile_idc;
3035 
3036  switch (sps->profile_idc) {
3038  // constraint_set1_flag set to 1
3039  profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
3040  break;
3044  // constraint_set3_flag set to 1
3045  profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
3046  break;
3047  }
3048 
3049  return profile;
3050 }
3051 
3053 {
3054  if (h->flags & CODEC_FLAG_LOW_DELAY ||
3056  !h->sps.num_reorder_frames)) {
3057  if (h->avctx->has_b_frames > 1 || h->delayed_pic[0])
3058  av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. "
3059  "Reenabling low delay requires a codec flush.\n");
3060  else
3061  h->low_delay = 1;
3062  }
3063 
3064  if (h->avctx->has_b_frames < 2)
3065  h->avctx->has_b_frames = !h->low_delay;
3066 
3067  if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
3069  if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
3072  h->pixel_shift = h->sps.bit_depth_luma > 8;
3073 
3075  h->sps.chroma_format_idc);
3079  h->sps.chroma_format_idc);
3081  ff_dsputil_init(&h->dsp, h->avctx);
3083  } else {
3084  av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth %d\n",
3085  h->sps.bit_depth_luma);
3086  return AVERROR_INVALIDDATA;
3087  }
3088  }
3089  return 0;
3090 }
3091 
3093 {
3094  enum AVPixelFormat pix_fmts[2];
3095  const enum AVPixelFormat *choices = pix_fmts;
3096 
3097  pix_fmts[1] = AV_PIX_FMT_NONE;
3098 
3099  switch (h->sps.bit_depth_luma) {
3100  case 9:
3101  if (CHROMA444(h)) {
3102  if (h->avctx->colorspace == AVCOL_SPC_RGB) {
3103  pix_fmts[0] = AV_PIX_FMT_GBRP9;
3104  } else
3105  pix_fmts[0] = AV_PIX_FMT_YUV444P9;
3106  } else if (CHROMA422(h))
3107  pix_fmts[0] = AV_PIX_FMT_YUV422P9;
3108  else
3109  pix_fmts[0] = AV_PIX_FMT_YUV420P9;
3110  break;
3111  case 10:
3112  if (CHROMA444(h)) {
3113  if (h->avctx->colorspace == AVCOL_SPC_RGB) {
3114  pix_fmts[0] = AV_PIX_FMT_GBRP10;
3115  } else
3116  pix_fmts[0] = AV_PIX_FMT_YUV444P10;
3117  } else if (CHROMA422(h))
3118  pix_fmts[0] = AV_PIX_FMT_YUV422P10;
3119  else
3120  pix_fmts[0] = AV_PIX_FMT_YUV420P10;
3121  break;
3122  case 8:
3123  if (CHROMA444(h)) {
3124  if (h->avctx->colorspace == AVCOL_SPC_RGB)
3125  pix_fmts[0] = AV_PIX_FMT_GBRP;
3126  else if (h->avctx->color_range == AVCOL_RANGE_JPEG)
3127  pix_fmts[0] = AV_PIX_FMT_YUVJ444P;
3128  else
3129  pix_fmts[0] = AV_PIX_FMT_YUV444P;
3130  } else if (CHROMA422(h)) {
3131  if (h->avctx->color_range == AVCOL_RANGE_JPEG)
3132  pix_fmts[0] = AV_PIX_FMT_YUVJ422P;
3133  else
3134  pix_fmts[0] = AV_PIX_FMT_YUV422P;
3135  } else {
3136  if (h->avctx->codec->pix_fmts)
3137  choices = h->avctx->codec->pix_fmts;
3138  else if (h->avctx->color_range == AVCOL_RANGE_JPEG)
3140  else
3141  choices = h264_hwaccel_pixfmt_list_420;
3142  }
3143  break;
3144  default:
3146  "Unsupported bit depth %d\n", h->sps.bit_depth_luma);
3147  return AVERROR_INVALIDDATA;
3148  }
3149 
3150  return h->avctx->get_format(h->avctx, choices);
3151 }
3152 
3153 /* export coded and cropped frame dimensions to AVCodecContext */
3155 {
3156  int width = h->width - (h->sps.crop_right + h->sps.crop_left);
3157  int height = h->height - (h->sps.crop_top + h->sps.crop_bottom);
3158 
3159  /* handle container cropping */
3160  if (!h->sps.crop &&
3161  FFALIGN(h->avctx->width, 16) == h->width &&
3162  FFALIGN(h->avctx->height, 16) == h->height) {
3163  width = h->avctx->width;
3164  height = h->avctx->height;
3165  }
3166 
3167  if (width <= 0 || height <= 0) {
3168  av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n",
3169  width, height);
3171  return AVERROR_INVALIDDATA;
3172 
3173  av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n");
3174  h->sps.crop_bottom = h->sps.crop_top = h->sps.crop_right = h->sps.crop_left = 0;
3175  h->sps.crop = 0;
3176 
3177  width = h->width;
3178  height = h->height;
3179  }
3180 
3181  h->avctx->coded_width = h->width;
3182  h->avctx->coded_height = h->height;
3183  h->avctx->width = width;
3184  h->avctx->height = height;
3185 
3186  return 0;
3187 }
3188 
3189 static int h264_slice_header_init(H264Context *h, int reinit)
3190 {
3191  int nb_slices = (HAVE_THREADS &&
3193  h->avctx->thread_count : 1;
3194  int i, ret;
3195 
3196  h->avctx->sample_aspect_ratio = h->sps.sar;
3199  &h->chroma_x_shift, &h->chroma_y_shift);
3200 
3201  if (h->sps.timing_info_present_flag) {
3202  int64_t den = h->sps.time_scale;
3203  if (h->x264_build < 44U)
3204  den *= 2;
3206  h->sps.num_units_in_tick, den, 1 << 30);
3207  }
3208 
3209  h->avctx->hwaccel = ff_find_hwaccel(h->avctx);
3210 
3211  if (reinit)
3212  free_tables(h, 0);
3213  h->first_field = 0;
3214  h->prev_interlaced_frame = 1;
3215 
3216  init_scan_tables(h);
3217  ret = ff_h264_alloc_tables(h);
3218  if (ret < 0) {
3219  av_log(h->avctx, AV_LOG_ERROR, "Could not allocate memory\n");
3220  return ret;
3221  }
3222 
3223  if (nb_slices > MAX_THREADS || (nb_slices > h->mb_height && h->mb_height)) {
3224  int max_slices;
3225  if (h->mb_height)
3226  max_slices = FFMIN(MAX_THREADS, h->mb_height);
3227  else
3228  max_slices = MAX_THREADS;
3229  av_log(h->avctx, AV_LOG_WARNING, "too many threads/slices %d,"
3230  " reducing to %d\n", nb_slices, max_slices);
3231  nb_slices = max_slices;
3232  }
3233  h->slice_context_count = nb_slices;
3234 
3236  ret = context_init(h);
3237  if (ret < 0) {
3238  av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
3239  return ret;
3240  }
3241  } else {
3242  for (i = 1; i < h->slice_context_count; i++) {
3243  H264Context *c;
3244  c = h->thread_context[i] = av_mallocz(sizeof(H264Context));
3245  if (!c)
3246  return AVERROR(ENOMEM);
3247  c->avctx = h->avctx;
3248  c->dsp = h->dsp;
3249  c->vdsp = h->vdsp;
3250  c->h264dsp = h->h264dsp;
3251  c->h264qpel = h->h264qpel;
3252  c->h264chroma = h->h264chroma;
3253  c->sps = h->sps;
3254  c->pps = h->pps;
3255  c->pixel_shift = h->pixel_shift;
3256  c->width = h->width;
3257  c->height = h->height;
3258  c->linesize = h->linesize;
3259  c->uvlinesize = h->uvlinesize;
3262  c->qscale = h->qscale;
3263  c->droppable = h->droppable;
3265  c->low_delay = h->low_delay;
3266  c->mb_width = h->mb_width;
3267  c->mb_height = h->mb_height;
3268  c->mb_stride = h->mb_stride;
3269  c->mb_num = h->mb_num;
3270  c->flags = h->flags;
3272  c->pict_type = h->pict_type;
3273 
3274  init_scan_tables(c);
3275  clone_tables(c, h, i);
3276  c->context_initialized = 1;
3277  }
3278 
3279  for (i = 0; i < h->slice_context_count; i++)
3280  if ((ret = context_init(h->thread_context[i])) < 0) {
3281  av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
3282  return ret;
3283  }
3284  }
3285 
3286  h->context_initialized = 1;
3287 
3288  return 0;
3289 }
3290 
3292 {
3293  int ref_count[2], list_count;
3294  int num_ref_idx_active_override_flag, max_refs;
3295 
3296  // set defaults, might be overridden a few lines later
3297  ref_count[0] = h->pps.ref_count[0];
3298  ref_count[1] = h->pps.ref_count[1];
3299 
3300  if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3303  num_ref_idx_active_override_flag = get_bits1(&h->gb);
3304 
3305  if (num_ref_idx_active_override_flag) {
3306  ref_count[0] = get_ue_golomb(&h->gb) + 1;
3307  if (ref_count[0] < 1)
3308  return AVERROR_INVALIDDATA;
3309  if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
3310  ref_count[1] = get_ue_golomb(&h->gb) + 1;
3311  if (ref_count[1] < 1)
3312  return AVERROR_INVALIDDATA;
3313  }
3314  }
3315 
3317  list_count = 2;
3318  else
3319  list_count = 1;
3320  } else {
3321  list_count = 0;
3322  ref_count[0] = ref_count[1] = 0;
3323  }
3324 
3325  max_refs = h->picture_structure == PICT_FRAME ? 16 : 32;
3326 
3327  if (ref_count[0] > max_refs || ref_count[1] > max_refs) {
3328  av_log(h->avctx, AV_LOG_ERROR, "reference overflow\n");
3329  h->ref_count[0] = h->ref_count[1] = 0;
3330  return AVERROR_INVALIDDATA;
3331  }
3332 
3333  if (list_count != h->list_count ||
3334  ref_count[0] != h->ref_count[0] ||
3335  ref_count[1] != h->ref_count[1]) {
3336  h->ref_count[0] = ref_count[0];
3337  h->ref_count[1] = ref_count[1];
3338  h->list_count = list_count;
3339  return 1;
3340  }
3341 
3342  return 0;
3343 }
3344 
3356 {
3357  unsigned int first_mb_in_slice;
3358  unsigned int pps_id;
3359  int ret;
3360  unsigned int slice_type, tmp, i, j;
3361  int default_ref_list_done = 0;
3362  int last_pic_structure, last_pic_droppable;
3363  int needs_reinit = 0;
3364  int field_pic_flag, bottom_field_flag;
3365 
3368 
3369  first_mb_in_slice = get_ue_golomb(&h->gb);
3370 
3371  if (first_mb_in_slice == 0) { // FIXME better field boundary detection
3372  if (h0->current_slice && h->cur_pic_ptr && FIELD_PICTURE(h)) {
3373  field_end(h, 1);
3374  }
3375 
3376  h0->current_slice = 0;
3377  if (!h0->first_field) {
3378  if (h->cur_pic_ptr && !h->droppable) {
3381  }
3382  h->cur_pic_ptr = NULL;
3383  }
3384  }
3385 
3386  slice_type = get_ue_golomb_31(&h->gb);
3387  if (slice_type > 9) {
3389  "slice type %d too large at %d %d\n",
3390  h->slice_type, h->mb_x, h->mb_y);
3391  return AVERROR_INVALIDDATA;
3392  }
3393  if (slice_type > 4) {
3394  slice_type -= 5;
3395  h->slice_type_fixed = 1;
3396  } else
3397  h->slice_type_fixed = 0;
3398 
3399  slice_type = golomb_to_pict_type[slice_type];
3400  if (slice_type == AV_PICTURE_TYPE_I ||
3401  (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
3402  default_ref_list_done = 1;
3403  }
3404  h->slice_type = slice_type;
3405  h->slice_type_nos = slice_type & 3;
3406 
3407  if (h->nal_unit_type == NAL_IDR_SLICE &&
3409  av_log(h->avctx, AV_LOG_ERROR, "A non-intra slice in an IDR NAL unit.\n");
3410  return AVERROR_INVALIDDATA;
3411  }
3412 
3413  // to make a few old functions happy, it's wrong though
3414  h->pict_type = h->slice_type;
3415 
3416  pps_id = get_ue_golomb(&h->gb);
3417  if (pps_id >= MAX_PPS_COUNT) {
3418  av_log(h->avctx, AV_LOG_ERROR, "pps_id %u out of range\n", pps_id);
3419  return AVERROR_INVALIDDATA;
3420  }
3421  if (!h0->pps_buffers[pps_id]) {
3423  "non-existing PPS %u referenced\n",
3424  pps_id);
3425  return AVERROR_INVALIDDATA;
3426  }
3427  h->pps = *h0->pps_buffers[pps_id];
3428 
3429  if (!h0->sps_buffers[h->pps.sps_id]) {
3431  "non-existing SPS %u referenced\n",
3432  h->pps.sps_id);
3433  return AVERROR_INVALIDDATA;
3434  }
3435 
3436  if (h->pps.sps_id != h->sps.sps_id ||
3437  h0->sps_buffers[h->pps.sps_id]->new) {
3438  h0->sps_buffers[h->pps.sps_id]->new = 0;
3439 
3440  h->sps = *h0->sps_buffers[h->pps.sps_id];
3441 
3442  if (h->bit_depth_luma != h->sps.bit_depth_luma ||
3446  needs_reinit = 1;
3447  }
3448  if ((ret = h264_set_parameter_from_sps(h)) < 0)
3449  return ret;
3450  }
3451 
3452  h->avctx->profile = ff_h264_get_profile(&h->sps);
3453  h->avctx->level = h->sps.level_idc;
3454  h->avctx->refs = h->sps.ref_frame_count;
3455 
3456  if (h->mb_width != h->sps.mb_width ||
3457  h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
3458  needs_reinit = 1;
3459 
3460  h->mb_width = h->sps.mb_width;
3461  h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3462  h->mb_num = h->mb_width * h->mb_height;
3463  h->mb_stride = h->mb_width + 1;
3464 
3465  h->b_stride = h->mb_width * 4;
3466 
3467  h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
3468 
3469  h->width = 16 * h->mb_width;
3470  h->height = 16 * h->mb_height;
3471 
3472  ret = init_dimensions(h);
3473  if (ret < 0)
3474  return ret;
3475 
3478  : AVCOL_RANGE_MPEG;
3480  if (h->avctx->colorspace != h->sps.colorspace)
3481  needs_reinit = 1;
3483  h->avctx->color_trc = h->sps.color_trc;
3484  h->avctx->colorspace = h->sps.colorspace;
3485  }
3486  }
3487 
3488  if (h->context_initialized &&
3489  (h->width != h->avctx->coded_width ||
3490  h->height != h->avctx->coded_height ||
3491  needs_reinit)) {
3492  h->context_initialized = 0;
3493  if (h != h0) {
3494  av_log(h->avctx, AV_LOG_ERROR, "changing width/height on "
3495  "slice %d\n", h0->current_slice + 1);
3496  return AVERROR_INVALIDDATA;
3497  }
3498 
3499  flush_change(h);
3500 
3501  if ((ret = get_pixel_format(h)) < 0)
3502  return ret;
3503  h->avctx->pix_fmt = ret;
3504 
3505  av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
3506  "pix_fmt: %d\n", h->width, h->height, h->avctx->pix_fmt);
3507 
3508  if ((ret = h264_slice_header_init(h, 1)) < 0) {
3510  "h264_slice_header_init() failed\n");
3511  return ret;
3512  }
3513  }
3514  if (!h->context_initialized) {
3515  if (h != h0) {
3517  "Cannot (re-)initialize context during parallel decoding.\n");
3518  return AVERROR_PATCHWELCOME;
3519  }
3520 
3521  if ((ret = get_pixel_format(h)) < 0)
3522  return ret;
3523  h->avctx->pix_fmt = ret;
3524 
3525  if ((ret = h264_slice_header_init(h, 0)) < 0) {
3527  "h264_slice_header_init() failed\n");
3528  return ret;
3529  }
3530  }
3531 
3532  if (h == h0 && h->dequant_coeff_pps != pps_id) {
3533  h->dequant_coeff_pps = pps_id;
3535  }
3536 
3537  h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
3538 
3539  h->mb_mbaff = 0;
3540  h->mb_aff_frame = 0;
3541  last_pic_structure = h0->picture_structure;
3542  last_pic_droppable = h0->droppable;
3543  h->droppable = h->nal_ref_idc == 0;
3544  if (h->sps.frame_mbs_only_flag) {
3546  } else {
3547  field_pic_flag = get_bits1(&h->gb);
3548  if (field_pic_flag) {
3549  bottom_field_flag = get_bits1(&h->gb);
3550  h->picture_structure = PICT_TOP_FIELD + bottom_field_flag;
3551  } else {
3553  h->mb_aff_frame = h->sps.mb_aff;
3554  }
3555  }
3557 
3558  if (h0->current_slice != 0) {
3559  if (last_pic_structure != h->picture_structure ||
3560  last_pic_droppable != h->droppable) {
3562  "Changing field mode (%d -> %d) between slices is not allowed\n",
3563  last_pic_structure, h->picture_structure);
3564  h->picture_structure = last_pic_structure;
3565  h->droppable = last_pic_droppable;
3566  return AVERROR_INVALIDDATA;
3567  } else if (!h0->cur_pic_ptr) {
3569  "unset cur_pic_ptr on slice %d\n",
3570  h0->current_slice + 1);
3571  return AVERROR_INVALIDDATA;
3572  }
3573  } else {
3574  /* Shorten frame num gaps so we don't have to allocate reference
3575  * frames just to throw them away */
3576  if (h->frame_num != h->prev_frame_num) {
3577  int unwrap_prev_frame_num = h->prev_frame_num;
3578  int max_frame_num = 1 << h->sps.log2_max_frame_num;
3579 
3580  if (unwrap_prev_frame_num > h->frame_num)
3581  unwrap_prev_frame_num -= max_frame_num;
3582 
3583  if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
3584  unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
3585  if (unwrap_prev_frame_num < 0)
3586  unwrap_prev_frame_num += max_frame_num;
3587 
3588  h->prev_frame_num = unwrap_prev_frame_num;
3589  }
3590  }
3591 
3592  /* See if we have a decoded first field looking for a pair...
3593  * Here, we're using that to see if we should mark previously
3594  * decode frames as "finished".
3595  * We have to do that before the "dummy" in-between frame allocation,
3596  * since that can modify s->current_picture_ptr. */
3597  if (h0->first_field) {
3598  assert(h0->cur_pic_ptr);
3599  assert(h0->cur_pic_ptr->f.buf[0]);
3600  assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF);
3601 
3602  /* figure out if we have a complementary field pair */
3603  if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
3604  /* Previous field is unmatched. Don't display it, but let it
3605  * remain for reference if marked as such. */
3606  if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
3607  ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
3608  last_pic_structure == PICT_TOP_FIELD);
3609  }
3610  } else {
3611  if (h0->cur_pic_ptr->frame_num != h->frame_num) {
3612  /* This and previous field were reference, but had
3613  * different frame_nums. Consider this field first in
3614  * pair. Throw away previous field except for reference
3615  * purposes. */
3616  if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
3617  ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
3618  last_pic_structure == PICT_TOP_FIELD);
3619  }
3620  } else {
3621  /* Second field in complementary pair */
3622  if (!((last_pic_structure == PICT_TOP_FIELD &&
3624  (last_pic_structure == PICT_BOTTOM_FIELD &&
3627  "Invalid field mode combination %d/%d\n",
3628  last_pic_structure, h->picture_structure);
3629  h->picture_structure = last_pic_structure;
3630  h->droppable = last_pic_droppable;
3631  return AVERROR_INVALIDDATA;
3632  } else if (last_pic_droppable != h->droppable) {
3634  "Found reference and non-reference fields in the same frame, which");
3635  h->picture_structure = last_pic_structure;
3636  h->droppable = last_pic_droppable;
3637  return AVERROR_PATCHWELCOME;
3638  }
3639  }
3640  }
3641  }
3642 
3643  while (h->frame_num != h->prev_frame_num &&
3644  h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
3645  Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
3646  av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
3647  h->frame_num, h->prev_frame_num);
3648  ret = h264_frame_start(h);
3649  if (ret < 0) {
3650  h0->first_field = 0;
3651  return ret;
3652  }
3653 
3654  h->prev_frame_num++;
3655  h->prev_frame_num %= 1 << h->sps.log2_max_frame_num;
3657  ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
3658  ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
3660  if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3661  return ret;
3663  if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3664  return ret;
3665  /* Error concealment: If a ref is missing, copy the previous ref
3666  * in its place.
3667  * FIXME: Avoiding a memcpy would be nice, but ref handling makes
3668  * many assumptions about there being no actual duplicates.
3669  * FIXME: This does not copy padding for out-of-frame motion
3670  * vectors. Given we are concealing a lost frame, this probably
3671  * is not noticeable by comparison, but it should be fixed. */
3672  if (h->short_ref_count) {
3673  if (prev) {
3674  av_image_copy(h->short_ref[0]->f.data,
3675  h->short_ref[0]->f.linesize,
3676  (const uint8_t **)prev->f.data,
3677  prev->f.linesize,
3678  h->avctx->pix_fmt,
3679  h->mb_width * 16,
3680  h->mb_height * 16);
3681  h->short_ref[0]->poc = prev->poc + 2;
3682  }
3683  h->short_ref[0]->frame_num = h->prev_frame_num;
3684  }
3685  }
3686 
3687  /* See if we have a decoded first field looking for a pair...
3688  * We're using that to see whether to continue decoding in that
3689  * frame, or to allocate a new one. */
3690  if (h0->first_field) {
3691  assert(h0->cur_pic_ptr);
3692  assert(h0->cur_pic_ptr->f.buf[0]);
3693  assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF);
3694 
3695  /* figure out if we have a complementary field pair */
3696  if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
3697  /* Previous field is unmatched. Don't display it, but let it
3698  * remain for reference if marked as such. */
3699  h0->cur_pic_ptr = NULL;
3700  h0->first_field = FIELD_PICTURE(h);
3701  } else {
3702  if (h0->cur_pic_ptr->frame_num != h->frame_num) {
3703  /* This and the previous field had different frame_nums.
3704  * Consider this field first in pair. Throw away previous
3705  * one except for reference purposes. */
3706  h0->first_field = 1;
3707  h0->cur_pic_ptr = NULL;
3708  } else {
3709  /* Second field in complementary pair */
3710  h0->first_field = 0;
3711  }
3712  }
3713  } else {
3714  /* Frame or first field in a potentially complementary pair */
3715  h0->first_field = FIELD_PICTURE(h);
3716  }
3717 
3718  if (!FIELD_PICTURE(h) || h0->first_field) {
3719  if (h264_frame_start(h) < 0) {
3720  h0->first_field = 0;
3721  return AVERROR_INVALIDDATA;
3722  }
3723  } else {
3725  }
3726  }
3727  if (h != h0 && (ret = clone_slice(h, h0)) < 0)
3728  return ret;
3729 
3730  h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
3731 
3732  assert(h->mb_num == h->mb_width * h->mb_height);
3733  if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
3734  first_mb_in_slice >= h->mb_num) {
3735  av_log(h->avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3736  return AVERROR_INVALIDDATA;
3737  }
3738  h->resync_mb_x = h->mb_x = first_mb_in_slice % h->mb_width;
3739  h->resync_mb_y = h->mb_y = (first_mb_in_slice / h->mb_width) <<
3742  h->resync_mb_y = h->mb_y = h->mb_y + 1;
3743  assert(h->mb_y < h->mb_height);
3744 
3745  if (h->picture_structure == PICT_FRAME) {
3746  h->curr_pic_num = h->frame_num;
3747  h->max_pic_num = 1 << h->sps.log2_max_frame_num;
3748  } else {
3749  h->curr_pic_num = 2 * h->frame_num + 1;
3750  h->max_pic_num = 1 << (h->sps.log2_max_frame_num + 1);
3751  }
3752 
3753  if (h->nal_unit_type == NAL_IDR_SLICE)
3754  get_ue_golomb(&h->gb); /* idr_pic_id */
3755 
3756  if (h->sps.poc_type == 0) {
3757  h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb);
3758 
3759  if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
3760  h->delta_poc_bottom = get_se_golomb(&h->gb);
3761  }
3762 
3763  if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
3764  h->delta_poc[0] = get_se_golomb(&h->gb);
3765 
3766  if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
3767  h->delta_poc[1] = get_se_golomb(&h->gb);
3768  }
3769 
3771 
3774 
3775  ret = ff_set_ref_count(h);
3776  if (ret < 0)
3777  return ret;
3778  else if (ret == 1)
3779  default_ref_list_done = 0;
3780 
3781  if (!default_ref_list_done)
3783 
3784  if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3786  if (ret < 0) {
3787  h->ref_count[1] = h->ref_count[0] = 0;
3788  return ret;
3789  }
3790  }
3791 
3792  if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
3793  (h->pps.weighted_bipred_idc == 1 &&
3796  else if (h->pps.weighted_bipred_idc == 2 &&
3798  implicit_weight_table(h, -1);
3799  } else {
3800  h->use_weight = 0;
3801  for (i = 0; i < 2; i++) {
3802  h->luma_weight_flag[i] = 0;
3803  h->chroma_weight_flag[i] = 0;
3804  }
3805  }
3806 
3807  // If frame-mt is enabled, only update mmco tables for the first slice
3808  // in a field. Subsequent slices can temporarily clobber h->mmco_index
3809  // or h->mmco, which will cause ref list mix-ups and decoding errors
3810  // further down the line. This may break decoding if the first slice is
3811  // corrupt, thus we only do this if frame-mt is enabled.
3812  if (h->nal_ref_idc) {
3813  ret = ff_h264_decode_ref_pic_marking(h0, &h->gb,
3815  h0->current_slice == 0);
3816  if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3817  return AVERROR_INVALIDDATA;
3818  }
3819 
3820  if (FRAME_MBAFF(h)) {
3822 
3824  implicit_weight_table(h, 0);
3825  implicit_weight_table(h, 1);
3826  }
3827  }
3828 
3832 
3833  if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) {
3834  tmp = get_ue_golomb_31(&h->gb);
3835  if (tmp > 2) {
3836  av_log(h->avctx, AV_LOG_ERROR, "cabac_init_idc %u overflow\n", tmp);
3837  return AVERROR_INVALIDDATA;
3838  }
3839  h->cabac_init_idc = tmp;
3840  }
3841 
3842  h->last_qscale_diff = 0;
3843  tmp = h->pps.init_qp + get_se_golomb(&h->gb);
3844  if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) {
3845  av_log(h->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3846  return AVERROR_INVALIDDATA;
3847  }
3848  h->qscale = tmp;
3849  h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale);
3850  h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale);
3851  // FIXME qscale / qp ... stuff
3852  if (h->slice_type == AV_PICTURE_TYPE_SP)
3853  get_bits1(&h->gb); /* sp_for_switch_flag */
3854  if (h->slice_type == AV_PICTURE_TYPE_SP ||
3856  get_se_golomb(&h->gb); /* slice_qs_delta */
3857 
3858  h->deblocking_filter = 1;
3859  h->slice_alpha_c0_offset = 0;
3860  h->slice_beta_offset = 0;
3862  tmp = get_ue_golomb_31(&h->gb);
3863  if (tmp > 2) {
3865  "deblocking_filter_idc %u out of range\n", tmp);
3866  return AVERROR_INVALIDDATA;
3867  }
3868  h->deblocking_filter = tmp;
3869  if (h->deblocking_filter < 2)
3870  h->deblocking_filter ^= 1; // 1<->0
3871 
3872  if (h->deblocking_filter) {
3873  h->slice_alpha_c0_offset = get_se_golomb(&h->gb) * 2;
3874  h->slice_beta_offset = get_se_golomb(&h->gb) * 2;
3875  if (h->slice_alpha_c0_offset > 12 ||
3876  h->slice_alpha_c0_offset < -12 ||
3877  h->slice_beta_offset > 12 ||
3878  h->slice_beta_offset < -12) {
3880  "deblocking filter parameters %d %d out of range\n",
3882  return AVERROR_INVALIDDATA;
3883  }
3884  }
3885  }
3886 
3887  if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
3893  h->nal_ref_idc == 0))
3894  h->deblocking_filter = 0;
3895 
3896  if (h->deblocking_filter == 1 && h0->max_contexts > 1) {
3897  if (h->avctx->flags2 & CODEC_FLAG2_FAST) {
3898  /* Cheat slightly for speed:
3899  * Do not bother to deblock across slices. */
3900  h->deblocking_filter = 2;
3901  } else {
3902  h0->max_contexts = 1;
3903  if (!h0->single_decode_warning) {
3904  av_log(h->avctx, AV_LOG_INFO,
3905  "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3906  h0->single_decode_warning = 1;
3907  }
3908  if (h != h0) {
3910  "Deblocking switched inside frame.\n");
3911  return 1;
3912  }
3913  }
3914  }
3915  h->qp_thresh = 15 -
3917  FFMAX3(0,
3919  h->pps.chroma_qp_index_offset[1]) +
3920  6 * (h->sps.bit_depth_luma - 8);
3921 
3922  h0->last_slice_type = slice_type;
3923  h->slice_num = ++h0->current_slice;
3924  if (h->slice_num >= MAX_SLICES) {
3926  "Too many slices, increase MAX_SLICES and recompile\n");
3927  }
3928 
3929  for (j = 0; j < 2; j++) {
3930  int id_list[16];
3931  int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j];
3932  for (i = 0; i < 16; i++) {
3933  id_list[i] = 60;
3934  if (j < h->list_count && i < h->ref_count[j] &&
3935  h->ref_list[j][i].f.buf[0]) {
3936  int k;
3937  AVBuffer *buf = h->ref_list[j][i].f.buf[0]->buffer;
3938  for (k = 0; k < h->short_ref_count; k++)
3939  if (h->short_ref[k]->f.buf[0]->buffer == buf) {
3940  id_list[i] = k;
3941  break;
3942  }
3943  for (k = 0; k < h->long_ref_count; k++)
3944  if (h->long_ref[k] && h->long_ref[k]->f.buf[0]->buffer == buf) {
3945  id_list[i] = h->short_ref_count + k;
3946  break;
3947  }
3948  }
3949  }
3950 
3951  ref2frm[0] =
3952  ref2frm[1] = -1;
3953  for (i = 0; i < 16; i++)
3954  ref2frm[i + 2] = 4 * id_list[i] + (h->ref_list[j][i].reference & 3);
3955  ref2frm[18 + 0] =
3956  ref2frm[18 + 1] = -1;
3957  for (i = 16; i < 48; i++)
3958  ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] +
3959  (h->ref_list[j][i].reference & 3);
3960  }
3961 
3962  if (h->avctx->debug & FF_DEBUG_PICT_INFO) {
3964  "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3965  h->slice_num,
3966  (h->picture_structure == PICT_FRAME ? "F" : h->picture_structure == PICT_TOP_FIELD ? "T" : "B"),
3967  first_mb_in_slice,
3969  h->slice_type_fixed ? " fix" : "",
3970  h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3971  pps_id, h->frame_num,
3972  h->cur_pic_ptr->field_poc[0],
3973  h->cur_pic_ptr->field_poc[1],
3974  h->ref_count[0], h->ref_count[1],
3975  h->qscale,
3976  h->deblocking_filter,
3978  h->use_weight,
3979  h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
3980  h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
3981  }
3982 
3983  return 0;
3984 }
3985 
3987 {
3988  switch (h->slice_type) {
3989  case AV_PICTURE_TYPE_P:
3990  return 0;
3991  case AV_PICTURE_TYPE_B:
3992  return 1;
3993  case AV_PICTURE_TYPE_I:
3994  return 2;
3995  case AV_PICTURE_TYPE_SP:
3996  return 3;
3997  case AV_PICTURE_TYPE_SI:
3998  return 4;
3999  default:
4000  return AVERROR_INVALIDDATA;
4001  }
4002 }
4003 
4005  int mb_type, int top_xy,
4006  int left_xy[LEFT_MBS],
4007  int top_type,
4008  int left_type[LEFT_MBS],
4009  int mb_xy, int list)
4010 {
4011  int b_stride = h->b_stride;
4012  int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
4013  int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
4014  if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
4015  if (USES_LIST(top_type, list)) {
4016  const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
4017  const int b8_xy = 4 * top_xy + 2;
4018  int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4019  AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
4020  ref_cache[0 - 1 * 8] =
4021  ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
4022  ref_cache[2 - 1 * 8] =
4023  ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]];
4024  } else {
4025  AV_ZERO128(mv_dst - 1 * 8);
4026  AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4027  }
4028 
4029  if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) {
4030  if (USES_LIST(left_type[LTOP], list)) {
4031  const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3;
4032  const int b8_xy = 4 * left_xy[LTOP] + 1;
4033  int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4034  AV_COPY32(mv_dst - 1 + 0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
4035  AV_COPY32(mv_dst - 1 + 8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
4036  AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
4037  AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]);
4038  ref_cache[-1 + 0] =
4039  ref_cache[-1 + 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
4040  ref_cache[-1 + 16] =
4041  ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
4042  } else {
4043  AV_ZERO32(mv_dst - 1 + 0);
4044  AV_ZERO32(mv_dst - 1 + 8);
4045  AV_ZERO32(mv_dst - 1 + 16);
4046  AV_ZERO32(mv_dst - 1 + 24);
4047  ref_cache[-1 + 0] =
4048  ref_cache[-1 + 8] =
4049  ref_cache[-1 + 16] =
4050  ref_cache[-1 + 24] = LIST_NOT_USED;
4051  }
4052  }
4053  }
4054 
4055  if (!USES_LIST(mb_type, list)) {
4056  fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4);
4057  AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4058  AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4059  AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4060  AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4061  return;
4062  }
4063 
4064  {
4065  int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
4066  int (*ref2frm)[64] = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4067  uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
4068  uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
4069  AV_WN32A(&ref_cache[0 * 8], ref01);
4070  AV_WN32A(&ref_cache[1 * 8], ref01);
4071  AV_WN32A(&ref_cache[2 * 8], ref23);
4072  AV_WN32A(&ref_cache[3 * 8], ref23);
4073  }
4074 
4075  {
4076  int16_t(*mv_src)[2] = &h->cur_pic.motion_val[list][4 * h->mb_x + 4 * h->mb_y * b_stride];
4077  AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride);
4078  AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride);
4079  AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride);
4080  AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride);
4081  }
4082 }
4083 
4088 static int fill_filter_caches(H264Context *h, int mb_type)
4089 {
4090  const int mb_xy = h->mb_xy;
4091  int top_xy, left_xy[LEFT_MBS];
4092  int top_type, left_type[LEFT_MBS];
4093  uint8_t *nnz;
4094  uint8_t *nnz_cache;
4095 
4096  top_xy = mb_xy - (h->mb_stride << MB_FIELD(h));
4097 
4098  /* Wow, what a mess, why didn't they simplify the interlacing & intra
4099  * stuff, I can't imagine that these complex rules are worth it. */
4100 
4101  left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
4102  if (FRAME_MBAFF(h)) {
4103  const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]);
4104  const int curr_mb_field_flag = IS_INTERLACED(mb_type);
4105  if (h->mb_y & 1) {
4106  if (left_mb_field_flag != curr_mb_field_flag)
4107  left_xy[LTOP] -= h->mb_stride;
4108  } else {
4109  if (curr_mb_field_flag)
4110  top_xy += h->mb_stride &
4111  (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1);
4112  if (left_mb_field_flag != curr_mb_field_flag)
4113  left_xy[LBOT] += h->mb_stride;
4114  }
4115  }
4116 
4117  h->top_mb_xy = top_xy;
4118  h->left_mb_xy[LTOP] = left_xy[LTOP];
4119  h->left_mb_xy[LBOT] = left_xy[LBOT];
4120  {
4121  /* For sufficiently low qp, filtering wouldn't do anything.
4122  * This is a conservative estimate: could also check beta_offset
4123  * and more accurate chroma_qp. */
4124  int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice
4125  int qp = h->cur_pic.qscale_table[mb_xy];
4126  if (qp <= qp_thresh &&
4127  (left_xy[LTOP] < 0 ||
4128  ((qp + h->cur_pic.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) &&
4129  (top_xy < 0 ||
4130  ((qp + h->cur_pic.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) {
4131  if (!FRAME_MBAFF(h))
4132  return 1;
4133  if ((left_xy[LTOP] < 0 ||
4134  ((qp + h->cur_pic.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) &&
4135  (top_xy < h->mb_stride ||
4136  ((qp + h->cur_pic.qscale_table[top_xy - h->mb_stride] + 1) >> 1) <= qp_thresh))
4137  return 1;
4138  }
4139  }
4140 
4141  top_type = h->cur_pic.mb_type[top_xy];
4142  left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]];
4143  left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]];
4144  if (h->deblocking_filter == 2) {
4145  if (h->slice_table[top_xy] != h->slice_num)
4146  top_type = 0;
4147  if (h->slice_table[left_xy[LBOT]] != h->slice_num)
4148  left_type[LTOP] = left_type[LBOT] = 0;
4149  } else {
4150  if (h->slice_table[top_xy] == 0xFFFF)
4151  top_type = 0;
4152  if (h->slice_table[left_xy[LBOT]] == 0xFFFF)
4153  left_type[LTOP] = left_type[LBOT] = 0;
4154  }
4155  h->top_type = top_type;
4156  h->left_type[LTOP] = left_type[LTOP];
4157  h->left_type[LBOT] = left_type[LBOT];
4158 
4159  if (IS_INTRA(mb_type))
4160  return 0;
4161 
4162  fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
4163  top_type, left_type, mb_xy, 0);
4164  if (h->list_count == 2)
4165  fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
4166  top_type, left_type, mb_xy, 1);
4167 
4168  nnz = h->non_zero_count[mb_xy];
4169  nnz_cache = h->non_zero_count_cache;
4170  AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]);
4171  AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]);
4172  AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]);
4173  AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]);
4174  h->cbp = h->cbp_table[mb_xy];
4175 
4176  if (top_type) {
4177  nnz = h->non_zero_count[top_xy];
4178  AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]);
4179  }
4180 
4181  if (left_type[LTOP]) {
4182  nnz = h->non_zero_count[left_xy[LTOP]];
4183  nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4];
4184  nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4];
4185  nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4];
4186  nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4];
4187  }
4188 
4189  /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
4190  * from what the loop filter needs */
4191  if (!CABAC(h) && h->pps.transform_8x8_mode) {
4192  if (IS_8x8DCT(top_type)) {
4193  nnz_cache[4 + 8 * 0] =
4194  nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12;
4195  nnz_cache[6 + 8 * 0] =
4196  nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12;
4197  }
4198  if (IS_8x8DCT(left_type[LTOP])) {
4199  nnz_cache[3 + 8 * 1] =
4200  nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF
4201  }
4202  if (IS_8x8DCT(left_type[LBOT])) {
4203  nnz_cache[3 + 8 * 3] =
4204  nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF
4205  }
4206 
4207  if (IS_8x8DCT(mb_type)) {
4208  nnz_cache[scan8[0]] =
4209  nnz_cache[scan8[1]] =
4210  nnz_cache[scan8[2]] =
4211  nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12;
4212 
4213  nnz_cache[scan8[0 + 4]] =
4214  nnz_cache[scan8[1 + 4]] =
4215  nnz_cache[scan8[2 + 4]] =
4216  nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12;
4217 
4218  nnz_cache[scan8[0 + 8]] =
4219  nnz_cache[scan8[1 + 8]] =
4220  nnz_cache[scan8[2 + 8]] =
4221  nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12;
4222 
4223  nnz_cache[scan8[0 + 12]] =
4224  nnz_cache[scan8[1 + 12]] =
4225  nnz_cache[scan8[2 + 12]] =
4226  nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12;
4227  }
4228  }
4229 
4230  return 0;
4231 }
4232 
4233 static void loop_filter(H264Context *h, int start_x, int end_x)
4234 {
4235  uint8_t *dest_y, *dest_cb, *dest_cr;
4236  int linesize, uvlinesize, mb_x, mb_y;
4237  const int end_mb_y = h->mb_y + FRAME_MBAFF(h);
4238  const int old_slice_type = h->slice_type;
4239  const int pixel_shift = h->pixel_shift;
4240  const int block_h = 16 >> h->chroma_y_shift;
4241 
4242  if (h->deblocking_filter) {
4243  for (mb_x = start_x; mb_x < end_x; mb_x++)
4244  for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) {
4245  int mb_xy, mb_type;
4246  mb_xy = h->mb_xy = mb_x + mb_y * h->mb_stride;
4247  h->slice_num = h->slice_table[mb_xy];
4248  mb_type = h->cur_pic.mb_type[mb_xy];
4249  h->list_count = h->list_counts[mb_xy];
4250 
4251  if (FRAME_MBAFF(h))
4252  h->mb_mbaff =
4253  h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
4254 
4255  h->mb_x = mb_x;
4256  h->mb_y = mb_y;
4257  dest_y = h->cur_pic.f.data[0] +
4258  ((mb_x << pixel_shift) + mb_y * h->linesize) * 16;
4259  dest_cb = h->cur_pic.f.data[1] +
4260  (mb_x << pixel_shift) * (8 << CHROMA444(h)) +
4261  mb_y * h->uvlinesize * block_h;
4262  dest_cr = h->cur_pic.f.data[2] +
4263  (mb_x << pixel_shift) * (8 << CHROMA444(h)) +
4264  mb_y * h->uvlinesize * block_h;
4265  // FIXME simplify above
4266 
4267  if (MB_FIELD(h)) {
4268  linesize = h->mb_linesize = h->linesize * 2;
4269  uvlinesize = h->mb_uvlinesize = h->uvlinesize * 2;
4270  if (mb_y & 1) { // FIXME move out of this function?
4271  dest_y -= h->linesize * 15;
4272  dest_cb -= h->uvlinesize * (block_h - 1);
4273  dest_cr -= h->uvlinesize * (block_h - 1);
4274  }
4275  } else {
4276  linesize = h->mb_linesize = h->linesize;
4277  uvlinesize = h->mb_uvlinesize = h->uvlinesize;
4278  }
4279  backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
4280  uvlinesize, 0);
4281  if (fill_filter_caches(h, mb_type))
4282  continue;
4283  h->chroma_qp[0] = get_chroma_qp(h, 0, h->cur_pic.qscale_table[mb_xy]);
4284  h->chroma_qp[1] = get_chroma_qp(h, 1, h->cur_pic.qscale_table[mb_xy]);
4285 
4286  if (FRAME_MBAFF(h)) {
4287  ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr,
4288  linesize, uvlinesize);
4289  } else {
4290  ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb,
4291  dest_cr, linesize, uvlinesize);
4292  }
4293  }
4294  }
4295  h->slice_type = old_slice_type;
4296  h->mb_x = end_x;
4297  h->mb_y = end_mb_y - FRAME_MBAFF(h);
4298  h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale);
4299  h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale);
4300 }
4301 
4303 {
4304  const int mb_xy = h->mb_x + h->mb_y * h->mb_stride;
4305  int mb_type = (h->slice_table[mb_xy - 1] == h->slice_num) ?
4306  h->cur_pic.mb_type[mb_xy - 1] :
4307  (h->slice_table[mb_xy - h->mb_stride] == h->slice_num) ?
4308  h->cur_pic.mb_type[mb_xy - h->mb_stride] : 0;
4309  h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4310 }
4311 
4316 {
4317  int top = 16 * (h->mb_y >> FIELD_PICTURE(h));
4318  int pic_height = 16 * h->mb_height >> FIELD_PICTURE(h);
4319  int height = 16 << FRAME_MBAFF(h);
4320  int deblock_border = (16 + 4) << FRAME_MBAFF(h);
4321 
4322  if (h->deblocking_filter) {
4323  if ((top + height) >= pic_height)
4324  height += deblock_border;
4325  top -= deblock_border;
4326  }
4327 
4328  if (top >= pic_height || (top + height) < 0)
4329  return;
4330 
4331  height = FFMIN(height, pic_height - top);
4332  if (top < 0) {
4333  height = top + height;
4334  top = 0;
4335  }
4336 
4337  ff_h264_draw_horiz_band(h, top, height);
4338 
4339  if (h->droppable)
4340  return;
4341 
4342  ff_thread_report_progress(&h->cur_pic_ptr->tf, top + height - 1,
4344 }
4345 
4346 static void er_add_slice(H264Context *h, int startx, int starty,
4347  int endx, int endy, int status)
4348 {
4349 #if CONFIG_ERROR_RESILIENCE
4350  ERContext *er = &h->er;
4351 
4352  er->ref_count = h->ref_count[0];
4353  ff_er_add_slice(er, startx, starty, endx, endy, status);
4354 #endif
4355 }
4356 
4357 static int decode_slice(struct AVCodecContext *avctx, void *arg)
4358 {
4359  H264Context *h = *(void **)arg;
4360  int lf_x_start = h->mb_x;
4361 
4362  h->mb_skip_run = -1;
4363 
4365  avctx->codec_id != AV_CODEC_ID_H264 ||
4366  (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY));
4367 
4368  if (h->pps.cabac) {
4369  /* realign */
4370  align_get_bits(&h->gb);
4371 
4372  /* init cabac */
4374  h->gb.buffer + get_bits_count(&h->gb) / 8,
4375  (get_bits_left(&h->gb) + 7) / 8);
4376 
4378 
4379  for (;;) {
4380  // START_TIMER
4381  int ret = ff_h264_decode_mb_cabac(h);
4382  int eos;
4383  // STOP_TIMER("decode_mb_cabac")
4384 
4385  if (ret >= 0)
4387 
4388  // FIXME optimal? or let mb_decode decode 16x32 ?
4389  if (ret >= 0 && FRAME_MBAFF(h)) {
4390  h->mb_y++;
4391 
4392  ret = ff_h264_decode_mb_cabac(h);
4393 
4394  if (ret >= 0)
4396  h->mb_y--;
4397  }
4398  eos = get_cabac_terminate(&h->cabac);
4399 
4400  if ((h->workaround_bugs & FF_BUG_TRUNCATED) &&
4401  h->cabac.bytestream > h->cabac.bytestream_end + 2) {
4402  er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1,
4403  h->mb_y, ER_MB_END);
4404  if (h->mb_x >= lf_x_start)
4405  loop_filter(h, lf_x_start, h->mb_x + 1);
4406  return 0;
4407  }
4408  if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
4410  "error while decoding MB %d %d, bytestream %td\n",
4411  h->mb_x, h->mb_y,
4413  er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4414  h->mb_y, ER_MB_ERROR);
4415  return AVERROR_INVALIDDATA;
4416  }
4417 
4418  if (++h->mb_x >= h->mb_width) {
4419  loop_filter(h, lf_x_start, h->mb_x);
4420  h->mb_x = lf_x_start = 0;
4421  decode_finish_row(h);
4422  ++h->mb_y;
4423  if (FIELD_OR_MBAFF_PICTURE(h)) {
4424  ++h->mb_y;
4425  if (FRAME_MBAFF(h) && h->mb_y < h->mb_height)
4427  }
4428  }
4429 
4430  if (eos || h->mb_y >= h->mb_height) {
4431  tprintf(h->avctx, "slice end %d %d\n",
4432  get_bits_count(&h->gb), h->gb.size_in_bits);
4433  er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1,
4434  h->mb_y, ER_MB_END);
4435  if (h->mb_x > lf_x_start)
4436  loop_filter(h, lf_x_start, h->mb_x);
4437  return 0;
4438  }
4439  }
4440  } else {
4441  for (;;) {
4442  int ret = ff_h264_decode_mb_cavlc(h);
4443 
4444  if (ret >= 0)
4446 
4447  // FIXME optimal? or let mb_decode decode 16x32 ?
4448  if (ret >= 0 && FRAME_MBAFF(h)) {
4449  h->mb_y++;
4450  ret = ff_h264_decode_mb_cavlc(h);
4451 
4452  if (ret >= 0)
4454  h->mb_y--;
4455  }
4456 
4457  if (ret < 0) {
4459  "error while decoding MB %d %d\n", h->mb_x, h->mb_y);
4460  er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4461  h->mb_y, ER_MB_ERROR);
4462  return ret;
4463  }
4464 
4465  if (++h->mb_x >= h->mb_width) {
4466  loop_filter(h, lf_x_start, h->mb_x);
4467  h->mb_x = lf_x_start = 0;
4468  decode_finish_row(h);
4469  ++h->mb_y;
4470  if (FIELD_OR_MBAFF_PICTURE(h)) {
4471  ++h->mb_y;
4472  if (FRAME_MBAFF(h) && h->mb_y < h->mb_height)
4474  }
4475  if (h->mb_y >= h->mb_height) {
4476  tprintf(h->avctx, "slice end %d %d\n",
4477  get_bits_count(&h->gb), h->gb.size_in_bits);
4478 
4479  if (get_bits_left(&h->gb) == 0) {
4481  h->mb_x - 1, h->mb_y,
4482  ER_MB_END);
4483 
4484  return 0;
4485  } else {
4487  h->mb_x - 1, h->mb_y,
4488  ER_MB_END);
4489 
4490  return AVERROR_INVALIDDATA;
4491  }
4492  }
4493  }
4494 
4495  if (get_bits_left(&h->gb) <= 0 && h->mb_skip_run <= 0) {
4496  tprintf(h->avctx, "slice end %d %d\n",
4497  get_bits_count(&h->gb), h->gb.size_in_bits);
4498 
4499  if (get_bits_left(&h->gb) == 0) {
4501  h->mb_x - 1, h->mb_y,
4502  ER_MB_END);
4503  if (h->mb_x > lf_x_start)
4504  loop_filter(h, lf_x_start, h->mb_x);
4505 
4506  return 0;
4507  } else {
4508  er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4509  h->mb_y, ER_MB_ERROR);
4510 
4511  return AVERROR_INVALIDDATA;
4512  }
4513  }
4514  }
4515  }
4516 }
4517 
4524 static int execute_decode_slices(H264Context *h, unsigned context_count)
4525 {
4526  AVCodecContext *const avctx = h->avctx;
4527  H264Context *hx;
4528  int i;
4529 
4530  if (h->mb_y >= h->mb_height) {
4532  "Input contains more MB rows than the frame height.\n");
4533  return AVERROR_INVALIDDATA;
4534  }
4535 
4536  if (h->avctx->hwaccel)
4537  return 0;
4538  if (context_count == 1) {
4539  return decode_slice(avctx, &h);
4540  } else {
4541  for (i = 1; i < context_count; i++) {
4542  hx = h->thread_context[i];
4543  hx->er.error_count = 0;
4544  }
4545 
4546  avctx->execute(avctx, decode_slice, h->thread_context,
4547  NULL, context_count, sizeof(void *));
4548 
4549  /* pull back stuff from slices to master context */
4550  hx = h->thread_context[context_count - 1];
4551  h->mb_x = hx->mb_x;
4552  h->mb_y = hx->mb_y;
4553  h->droppable = hx->droppable;
4555  for (i = 1; i < context_count; i++)
4557  }
4558 
4559  return 0;
4560 }
4561 
4562 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
4563  int parse_extradata)
4564 {
4565  AVCodecContext *const avctx = h->avctx;
4566  H264Context *hx;
4567  int buf_index;
4568  unsigned context_count;
4569  int next_avc;
4570  int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
4571  int nals_needed = 0;
4572  int nal_index;
4573  int ret = 0;
4574 
4576  if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) {
4577  h->current_slice = 0;
4578  if (!h->first_field)
4579  h->cur_pic_ptr = NULL;
4580  ff_h264_reset_sei(h);
4581  }
4582 
4583  for (; pass <= 1; pass++) {
4584  buf_index = 0;
4585  context_count = 0;
4586  next_avc = h->is_avc ? 0 : buf_size;
4587  nal_index = 0;
4588  for (;;) {
4589  int consumed;
4590  int dst_length;
4591  int bit_length;
4592  const uint8_t *ptr;
4593  int i, nalsize = 0;
4594  int err;
4595 
4596  if (buf_index >= next_avc) {
4597  if (buf_index >= buf_size - h->nal_length_size)
4598  break;
4599  nalsize = 0;
4600  for (i = 0; i < h->nal_length_size; i++)
4601  nalsize = (nalsize << 8) | buf[buf_index++];
4602  if (nalsize <= 0 || nalsize > buf_size - buf_index) {
4604  "AVC: nal size %d\n", nalsize);
4605  break;
4606  }
4607  next_avc = buf_index + nalsize;
4608  } else {
4609  // start code prefix search
4610  for (; buf_index + 3 < next_avc; buf_index++)
4611  // This should always succeed in the first iteration.
4612  if (buf[buf_index] == 0 &&
4613  buf[buf_index + 1] == 0 &&
4614  buf[buf_index + 2] == 1)
4615  break;
4616 
4617  if (buf_index + 3 >= buf_size) {
4618  buf_index = buf_size;
4619  break;
4620  }
4621 
4622  buf_index += 3;
4623  if (buf_index >= next_avc)
4624  continue;
4625  }
4626 
4627  hx = h->thread_context[context_count];
4628 
4629  ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
4630  &consumed, next_avc - buf_index);
4631  if (ptr == NULL || dst_length < 0) {
4632  ret = -1;
4633  goto end;
4634  }
4635  i = buf_index + consumed;
4636  if ((h->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc &&
4637  buf[i] == 0x00 && buf[i + 1] == 0x00 &&
4638  buf[i + 2] == 0x01 && buf[i + 3] == 0xE0)
4640 
4641  if (!(h->workaround_bugs & FF_BUG_TRUNCATED))
4642  while (dst_length > 0 && ptr[dst_length - 1] == 0)
4643  dst_length--;
4644  bit_length = !dst_length ? 0
4645  : (8 * dst_length -
4646  decode_rbsp_trailing(h, ptr + dst_length - 1));
4647 
4648  if (h->avctx->debug & FF_DEBUG_STARTCODE)
4650  "NAL %d at %d/%d length %d\n",
4651  hx->nal_unit_type, buf_index, buf_size, dst_length);
4652 
4653  if (h->is_avc && (nalsize != consumed) && nalsize)
4655  "AVC: Consumed only %d bytes instead of %d\n",
4656  consumed, nalsize);
4657 
4658  buf_index += consumed;
4659  nal_index++;
4660 
4661  if (pass == 0) {
4662  /* packets can sometimes contain multiple PPS/SPS,
4663  * e.g. two PAFF field pictures in one packet, or a demuxer
4664  * which splits NALs strangely if so, when frame threading we
4665  * can't start the next thread until we've read all of them */
4666  switch (hx->nal_unit_type) {
4667  case NAL_SPS:
4668  case NAL_PPS:
4669  nals_needed = nal_index;
4670  break;
4671  case NAL_DPA:
4672  case NAL_IDR_SLICE:
4673  case NAL_SLICE:
4674  init_get_bits(&hx->gb, ptr, bit_length);
4675  if (!get_ue_golomb(&hx->gb))
4676  nals_needed = nal_index;
4677  }
4678  continue;
4679  }
4680 
4681  if (avctx->skip_frame >= AVDISCARD_NONREF &&
4682  h->nal_ref_idc == 0 &&
4683  h->nal_unit_type != NAL_SEI)
4684  continue;
4685 
4686 again:
4687  /* Ignore every NAL unit type except PPS and SPS during extradata
4688  * parsing. Decoding slices is not possible in codec init
4689  * with frame-mt */
4690  if (parse_extradata && HAVE_THREADS &&
4692  (hx->nal_unit_type != NAL_PPS &&
4693  hx->nal_unit_type != NAL_SPS)) {
4694  if (hx->nal_unit_type < NAL_AUD ||
4696  av_log(avctx, AV_LOG_INFO,
4697  "Ignoring NAL unit %d during extradata parsing\n",
4698  hx->nal_unit_type);
4700  }
4701  err = 0;
4702  switch (hx->nal_unit_type) {
4703  case NAL_IDR_SLICE:
4704  if (h->nal_unit_type != NAL_IDR_SLICE) {
4706  "Invalid mix of idr and non-idr slices\n");
4707  ret = -1;
4708  goto end;
4709  }
4710  idr(h); // FIXME ensure we don't lose some frames if there is reordering
4711  case NAL_SLICE:
4712  init_get_bits(&hx->gb, ptr, bit_length);
4713  hx->intra_gb_ptr =
4714  hx->inter_gb_ptr = &hx->gb;
4715  hx->data_partitioning = 0;
4716 
4717  if ((err = decode_slice_header(hx, h)))
4718  break;
4719 
4720  if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
4722  ((1 << h->sps.log2_max_frame_num) - 1);
4723  }
4724 
4725  h->cur_pic_ptr->f.key_frame |=
4726  (hx->nal_unit_type == NAL_IDR_SLICE) ||
4727  (h->sei_recovery_frame_cnt >= 0);
4728 
4729  if (hx->nal_unit_type == NAL_IDR_SLICE ||
4730  h->recovery_frame == h->frame_num) {
4731  h->recovery_frame = -1;
4732  h->cur_pic_ptr->recovered = 1;
4733  }
4734  // If we have an IDR, all frames after it in decoded order are
4735  // "recovered".
4736  if (hx->nal_unit_type == NAL_IDR_SLICE)
4739 
4740  if (h->current_slice == 1) {
4741  if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS))
4742  decode_postinit(h, nal_index >= nals_needed);
4743 
4744  if (h->avctx->hwaccel &&
4745  (ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0)) < 0)
4746  return ret;
4747  }
4748 
4749  if (hx->redundant_pic_count == 0 &&
4750  (avctx->skip_frame < AVDISCARD_NONREF ||
4751  hx->nal_ref_idc) &&
4752  (avctx->skip_frame < AVDISCARD_BIDIR ||
4754  (avctx->skip_frame < AVDISCARD_NONKEY ||
4756  avctx->skip_frame < AVDISCARD_ALL) {
4757  if (avctx->hwaccel) {
4758  ret = avctx->hwaccel->decode_slice(avctx,
4759  &buf[buf_index - consumed],
4760  consumed);
4761  if (ret < 0)
4762  return ret;
4763  } else
4764  context_count++;
4765  }
4766  break;
4767  case NAL_DPA:
4768  if (h->avctx->flags & CODEC_FLAG2_CHUNKS) {
4770  "Decoding in chunks is not supported for "
4771  "partitioned slices.\n");
4772  return AVERROR(ENOSYS);
4773  }
4774 
4775  init_get_bits(&hx->gb, ptr, bit_length);
4776  hx->intra_gb_ptr =
4777  hx->inter_gb_ptr = NULL;
4778 
4779  if ((err = decode_slice_header(hx, h)) < 0) {
4780  /* make sure data_partitioning is cleared if it was set
4781  * before, so we don't try decoding a slice without a valid
4782  * slice header later */
4783  h->data_partitioning = 0;
4784  break;
4785  }
4786 
4787  hx->data_partitioning = 1;
4788  break;
4789  case NAL_DPB:
4790  init_get_bits(&hx->intra_gb, ptr, bit_length);
4791  hx->intra_gb_ptr = &hx->intra_gb;
4792  break;
4793  case NAL_DPC:
4794  init_get_bits(&hx->inter_gb, ptr, bit_length);
4795  hx->inter_gb_ptr = &hx->inter_gb;
4796 
4797  if (hx->redundant_pic_count == 0 &&
4798  hx->intra_gb_ptr &&
4799  hx->data_partitioning &&
4800  h->cur_pic_ptr && h->context_initialized &&
4801  (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
4802  (avctx->skip_frame < AVDISCARD_BIDIR ||
4804  (avctx->skip_frame < AVDISCARD_NONKEY ||
4806  avctx->skip_frame < AVDISCARD_ALL)
4807  context_count++;
4808  break;
4809  case NAL_SEI:
4810  init_get_bits(&h->gb, ptr, bit_length);
4811  ff_h264_decode_sei(h);
4812  break;
4813  case NAL_SPS:
4814  init_get_bits(&h->gb, ptr, bit_length);
4816  if (ret < 0 && h->is_avc && (nalsize != consumed) && nalsize) {
4818  "SPS decoding failure, trying again with the complete NAL\n");
4819  init_get_bits(&h->gb, buf + buf_index + 1 - consumed,
4820  8 * (nalsize - 1));
4822  }
4823 
4824  ret = h264_set_parameter_from_sps(h);
4825  if (ret < 0)
4826  goto end;
4827 
4828  break;
4829  case NAL_PPS:
4830  init_get_bits(&h->gb, ptr, bit_length);
4831  ff_h264_decode_picture_parameter_set(h, bit_length);
4832  break;
4833  case NAL_AUD:
4834  case NAL_END_SEQUENCE:
4835  case NAL_END_STREAM:
4836  case NAL_FILLER_DATA:
4837  case NAL_SPS_EXT:
4838  case NAL_AUXILIARY_SLICE:
4839  break;
4840  case NAL_FF_IGNORE:
4841  break;
4842  default:
4843  av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n",
4844  hx->nal_unit_type, bit_length);
4845  }
4846 
4847  if (context_count == h->max_contexts) {
4848  execute_decode_slices(h, context_count);
4849  context_count = 0;
4850  }
4851 
4852  if (err < 0) {
4853  av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n");
4854  h->ref_count[0] = h->ref_count[1] = h->list_count = 0;
4855  } else if (err == 1) {
4856  /* Slice could not be decoded in parallel mode, copy down
4857  * NAL unit stuff to context 0 and restart. Note that
4858  * rbsp_buffer is not transferred, but since we no longer
4859  * run in parallel mode this should not be an issue. */
4860  h->nal_unit_type = hx->nal_unit_type;
4861  h->nal_ref_idc = hx->nal_ref_idc;
4862  hx = h;
4863  goto again;
4864  }
4865  }
4866  }
4867  if (context_count)
4868  execute_decode_slices(h, context_count);
4869 
4870 end:
4871  /* clean up */
4872  if (h->cur_pic_ptr && !h->droppable) {
4875  }
4876 
4877  return (ret < 0) ? ret : buf_index;
4878 }
4879 
4883 static int get_consumed_bytes(int pos, int buf_size)
4884 {
4885  if (pos == 0)
4886  pos = 1; // avoid infinite loops (i doubt that is needed but ...)
4887  if (pos + 10 > buf_size)
4888  pos = buf_size; // oops ;)
4889 
4890  return pos;
4891 }
4892 
4893 static int output_frame(H264Context *h, AVFrame *dst, AVFrame *src)
4894 {
4895  int i;
4896  int ret = av_frame_ref(dst, src);
4897  if (ret < 0)
4898  return ret;
4899 
4900  if (!h->sps.crop)
4901  return 0;
4902 
4903  for (i = 0; i < 3; i++) {
4904  int hshift = (i > 0) ? h->chroma_x_shift : 0;
4905  int vshift = (i > 0) ? h->chroma_y_shift : 0;
4906  int off = ((h->sps.crop_left >> hshift) << h->pixel_shift) +
4907  (h->sps.crop_top >> vshift) * dst->linesize[i];
4908  dst->data[i] += off;
4909  }
4910  return 0;
4911 }
4912 
4913 static int h264_decode_frame(AVCodecContext *avctx, void *data,
4914  int *got_frame, AVPacket *avpkt)
4915 {
4916  const uint8_t *buf = avpkt->data;
4917  int buf_size = avpkt->size;
4918  H264Context *h = avctx->priv_data;
4919  AVFrame *pict = data;
4920  int buf_index = 0;
4921  int ret;
4922 
4923  h->flags = avctx->flags;
4924  /* reset data partitioning here, to ensure GetBitContexts from previous
4925  * packets do not get used. */
4926  h->data_partitioning = 0;
4927 
4928  /* end of stream, output what is still in the buffers */
4929 out:
4930  if (buf_size == 0) {
4931  Picture *out;
4932  int i, out_idx;
4933 
4934  h->cur_pic_ptr = NULL;
4935 
4936  // FIXME factorize this with the output code below
4937  out = h->delayed_pic[0];
4938  out_idx = 0;
4939  for (i = 1;
4940  h->delayed_pic[i] &&
4941  !h->delayed_pic[i]->f.key_frame &&
4942  !h->delayed_pic[i]->mmco_reset;
4943  i++)
4944  if (h->delayed_pic[i]->poc < out->poc) {
4945  out = h->delayed_pic[i];
4946  out_idx = i;
4947  }
4948 
4949  for (i = out_idx; h->delayed_pic[i]; i++)
4950  h->delayed_pic[i] = h->delayed_pic[i + 1];
4951 
4952  if (out) {
4953  ret = output_frame(h, pict, &out->f);
4954  if (ret < 0)
4955  return ret;
4956  *got_frame = 1;
4957  }
4958 
4959  return buf_index;
4960  }
4961 
4962  buf_index = decode_nal_units(h, buf, buf_size, 0);
4963  if (buf_index < 0)
4964  return AVERROR_INVALIDDATA;
4965 
4966  if (!h->cur_pic_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
4967  buf_size = 0;
4968  goto out;
4969  }
4970 
4971  if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) {
4972  if (avctx->skip_frame >= AVDISCARD_NONREF)
4973  return 0;
4974  av_log(avctx, AV_LOG_ERROR, "no frame!\n");
4975  return AVERROR_INVALIDDATA;
4976  }
4977 
4978  if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) ||
4979  (h->mb_y >= h->mb_height && h->mb_height)) {
4980  if (avctx->flags2 & CODEC_FLAG2_CHUNKS)
4981  decode_postinit(h, 1);
4982 
4983  field_end(h, 0);
4984 
4985  *got_frame = 0;
4986  if (h->next_output_pic && ((avctx->flags & CODEC_FLAG_OUTPUT_CORRUPT) ||
4987  h->next_output_pic->recovered)) {
4988  if (!h->next_output_pic->recovered)
4990 
4991  ret = output_frame(h, pict, &h->next_output_pic->f);
4992  if (ret < 0)
4993  return ret;
4994  *got_frame = 1;
4995  }
4996  }
4997 
4998  assert(pict->buf[0] || !*got_frame);
4999 
5000  return get_consumed_bytes(buf_index, buf_size);
5001 }
5002 
5004 {
5005  int i;
5006 
5007  free_tables(h, 1); // FIXME cleanup init stuff perhaps
5008 
5009  for (i = 0; i < MAX_SPS_COUNT; i++)
5010  av_freep(h->sps_buffers + i);
5011 
5012  for (i = 0; i < MAX_PPS_COUNT; i++)
5013  av_freep(h->pps_buffers + i);
5014 }
5015 
5017 {
5018  H264Context *h = avctx->priv_data;
5019 
5021 
5022  unref_picture(h, &h->cur_pic);
5023 
5024  return 0;
5025 }
5026 
5027 static const AVProfile profiles[] = {
5028  { FF_PROFILE_H264_BASELINE, "Baseline" },
5029  { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" },
5030  { FF_PROFILE_H264_MAIN, "Main" },
5031  { FF_PROFILE_H264_EXTENDED, "Extended" },
5032  { FF_PROFILE_H264_HIGH, "High" },
5033  { FF_PROFILE_H264_HIGH_10, "High 10" },
5034  { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" },
5035  { FF_PROFILE_H264_HIGH_422, "High 4:2:2" },
5036  { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" },
5037  { FF_PROFILE_H264_HIGH_444, "High 4:4:4" },
5038  { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" },
5039  { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" },
5040  { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" },
5041  { FF_PROFILE_UNKNOWN },
5042 };
5043 
5045  .name = "h264",
5046  .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
5047  .type = AVMEDIA_TYPE_VIDEO,
5048  .id = AV_CODEC_ID_H264,
5049  .priv_data_size = sizeof(H264Context),
5053  .capabilities = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
5056  .flush = flush_dpb,
5057  .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
5058  .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
5059  .profiles = NULL_IF_CONFIG_SMALL(profiles),
5060 };