39 { 36, 68, 60, 92, 34, 66, 58, 90, },
40 { 100, 4, 124, 28, 98, 2, 122, 26, },
41 { 52, 84, 44, 76, 50, 82, 42, 74, },
42 { 116, 20, 108, 12, 114, 18, 106, 10, },
43 { 32, 64, 56, 88, 38, 70, 62, 94, },
44 { 96, 0, 120, 24, 102, 6, 126, 30, },
45 { 48, 80, 40, 72, 54, 86, 46, 78, },
46 { 112, 16, 104, 8, 118, 22, 110, 14, },
50 64, 64, 64, 64, 64, 64, 64, 64
57 uint8_t *ptr = plane + stride * y;
58 for (i = 0; i <
height; i++) {
59 memset(ptr, val, width);
66 const int dst_depth,
const int big_endian)
69 uint16_t *dst = (uint16_t *) (plane + stride * y);
70 #define FILL8TO9_OR_10(wfunc) \
71 for (i = 0; i < height; i++) { \
72 for (j = 0; j < width; j++) { \
73 wfunc(&dst[j], (val << (dst_depth - 8)) | \
74 (val >> (16 - dst_depth))); \
88 const int32_t *filterPos,
int filterSize)
93 const uint16_t *src = (
const uint16_t *) _src;
97 for (i = 0; i <
dstW; i++) {
99 int srcPos = filterPos[i];
102 for (j = 0; j < filterSize; j++) {
103 val += src[srcPos + j] * filter[filterSize * i + j];
106 dst[i] =
FFMIN(val >> sh, (1 << 19) - 1);
112 const int32_t *filterPos,
int filterSize)
116 const uint16_t *src = (
const uint16_t *) _src;
119 for (i = 0; i <
dstW; i++) {
121 int srcPos = filterPos[i];
124 for (j = 0; j < filterSize; j++) {
125 val += src[srcPos + j] * filter[filterSize * i + j];
128 dst[i] =
FFMIN(val >> sh, (1 << 15) - 1);
135 const int32_t *filterPos,
int filterSize)
138 for (i = 0; i <
dstW; i++) {
140 int srcPos = filterPos[i];
142 for (j = 0; j < filterSize; j++) {
143 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
145 dst[i] =
FFMIN(val >> 7, (1 << 15) - 1);
151 const int32_t *filterPos,
int filterSize)
155 for (i = 0; i <
dstW; i++) {
157 int srcPos = filterPos[i];
159 for (j = 0; j < filterSize; j++) {
160 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
162 dst[i] =
FFMIN(val >> 3, (1 << 19) - 1);
171 for (i = 0; i <
width; i++) {
172 dstU[i] = (
FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12;
173 dstV[i] = (
FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12;
180 for (i = 0; i <
width; i++) {
181 dstU[i] = (dstU[i] * 1799 + 4081085) >> 11;
182 dstV[i] = (dstV[i] * 1799 + 4081085) >> 11;
189 for (i = 0; i <
width; i++)
190 dst[i] = (
FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
196 for (i = 0; i <
width; i++)
197 dst[i] = (dst[i] * 14071 + 33561947) >> 14;
205 for (i = 0; i <
width; i++) {
206 dstU[i] = (
FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
207 dstV[i] = (
FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
216 for (i = 0; i <
width; i++) {
217 dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11;
218 dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11;
226 for (i = 0; i <
width; i++)
227 dst[i] = (
FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
234 for (i = 0; i <
width; i++)
235 dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
242 unsigned int xpos = 0;
243 for (i = 0; i < dstWidth; i++) {
244 register unsigned int xx = xpos >> 16;
245 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
246 dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
259 uint32_t *pal,
int isAlpha)
264 const uint8_t *src = src_in[isAlpha ? 3 : 0];
267 toYV12(formatConvBuffer, src, srcW, pal);
275 c->
hyScale(c, dst, dstWidth, src, hLumFilter,
276 hLumFilterPos, hLumFilterSize);
282 convertRange(dst, dstWidth);
286 int dstWidth,
const uint8_t *src1,
290 unsigned int xpos = 0;
291 for (i = 0; i < dstWidth; i++) {
292 register unsigned int xx = xpos >> 16;
293 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
294 dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
295 dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
301 int16_t *dst2,
int dstWidth,
309 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
311 uint8_t *buf2 = formatConvBuffer +
313 c->
chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
317 uint8_t *buf2 = formatConvBuffer +
325 c->
hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
326 c->
hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
328 c->
hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
335 #define DEBUG_SWSCALE_BUFFERS 0
336 #define DEBUG_BUFFERS(...) \
337 if (DEBUG_SWSCALE_BUFFERS) \
338 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
341 int srcStride[],
int srcSliceY,
342 int srcSliceH,
uint8_t *dst[],
int dstStride[])
405 srcStride[3] = srcStride[0];
410 DEBUG_BUFFERS(
"swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
411 src[0], srcStride[0], src[1], srcStride[1],
412 src[2], srcStride[2], src[3], srcStride[3],
413 dst[0], dstStride[0], dst[1], dstStride[1],
414 dst[2], dstStride[2], dst[3], dstStride[3]);
415 DEBUG_BUFFERS(
"srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
416 srcSliceY, srcSliceH, dstY, dstH);
417 DEBUG_BUFFERS(
"vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
418 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
420 if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
421 dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
422 static int warnedAlready = 0;
425 "Warning: dstStride is not aligned!\n"
426 " ->cannot do aligned memory accesses anymore\n");
434 if (srcSliceY == 0) {
442 if (!should_dither) {
447 for (; dstY <
dstH; dstY++) {
450 dst[0] + dstStride[0] *
dstY,
451 dst[1] + dstStride[1] * chrDstY,
452 dst[2] + dstStride[2] * chrDstY,
457 const int firstLumSrcY =
FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
460 const int firstChrSrcY =
FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
463 int lastLumSrcY =
FFMIN(c->
srcH, firstLumSrcY + vLumFilterSize) - 1;
464 int lastLumSrcY2 =
FFMIN(c->
srcH, firstLumSrcY2 + vLumFilterSize) - 1;
465 int lastChrSrcY =
FFMIN(c->
chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
469 if (firstLumSrcY > lastInLumBuf)
470 lastInLumBuf = firstLumSrcY - 1;
471 if (firstChrSrcY > lastInChrBuf)
472 lastInChrBuf = firstChrSrcY - 1;
473 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
474 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
477 DEBUG_BUFFERS(
"\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
478 firstLumSrcY, lastLumSrcY, lastInLumBuf);
479 DEBUG_BUFFERS(
"\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
480 firstChrSrcY, lastChrSrcY, lastInChrBuf);
483 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
487 lastLumSrcY = srcSliceY + srcSliceH - 1;
488 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
489 DEBUG_BUFFERS(
"buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
490 lastLumSrcY, lastChrSrcY);
494 while (lastInLumBuf < lastLumSrcY) {
496 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
497 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
498 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
499 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
502 assert(lumBufIndex < 2 * vLumBufSize);
503 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
504 assert(lastInLumBuf + 1 - srcSliceY >= 0);
505 hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
506 hLumFilter, hLumFilterPos, hLumFilterSize,
507 formatConvBuffer, pal, 0);
509 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
510 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
511 formatConvBuffer, pal, 1);
514 lumBufIndex, lastInLumBuf);
516 while (lastInChrBuf < lastChrSrcY) {
518 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
519 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
520 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
521 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
524 assert(chrBufIndex < 2 * vChrBufSize);
525 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
526 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
530 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
531 chrDstW, src1, chrSrcW, chrXInc,
532 hChrFilter, hChrFilterPos, hChrFilterSize,
533 formatConvBuffer, pal);
536 chrBufIndex, lastInChrBuf);
539 if (lumBufIndex >= vLumBufSize)
541 if (chrBufIndex >= vChrBufSize)
548 lastInLumBuf, lastInChrBuf);
554 if (dstY >= dstH - 2) {
558 &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
562 const int16_t **lumSrcPtr = (
const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
563 const int16_t **chrUSrcPtr = (
const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
564 const int16_t **chrVSrcPtr = (
const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
566 (
const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize :
NULL;
568 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
569 const int16_t **tmpY = (
const int16_t **)lumPixBuf +
571 int neg = -firstLumSrcY, i;
572 int end =
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
573 for (i = 0; i < neg; i++)
574 tmpY[i] = lumSrcPtr[neg];
576 tmpY[i] = lumSrcPtr[i];
578 tmpY[i] = tmpY[i - 1];
582 const int16_t **tmpA = (
const int16_t **)alpPixBuf +
584 for (i = 0; i < neg; i++)
585 tmpA[i] = alpSrcPtr[neg];
587 tmpA[i] = alpSrcPtr[i];
589 tmpA[i] = tmpA[i - 1];
593 if (firstChrSrcY < 0 ||
594 firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
595 const int16_t **tmpU = (
const int16_t **)chrUPixBuf + 2 * vChrBufSize,
596 **tmpV = (
const int16_t **)chrVPixBuf + 2 *
vChrBufSize;
597 int neg = -firstChrSrcY, i;
598 int end =
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
599 for (i = 0; i < neg; i++) {
600 tmpU[i] = chrUSrcPtr[neg];
601 tmpV[i] = chrVSrcPtr[neg];
603 for (; i < end; i++) {
604 tmpU[i] = chrUSrcPtr[i];
605 tmpV[i] = chrVSrcPtr[i];
608 tmpU[i] = tmpU[i - 1];
609 tmpV[i] = tmpV[i - 1];
619 if (vLumFilterSize == 1) {
622 yuv2planeX(vLumFilter + dstY * vLumFilterSize,
623 vLumFilterSize, lumSrcPtr, dest[0],
627 if (!((dstY & chrSkipMask) ||
isGray(dstFormat))) {
629 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
630 vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
632 }
else if (vChrFilterSize == 1) {
636 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
637 vChrFilterSize, chrUSrcPtr, dest[1],
639 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
640 vChrFilterSize, chrVSrcPtr, dest[2],
646 if (vLumFilterSize == 1) {
650 yuv2planeX(vLumFilter + dstY * vLumFilterSize,
651 vLumFilterSize, alpSrcPtr, dest[3],
655 }
else if (yuv2packedX) {
657 vChrFilterSize <= 2) {
658 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
660 alpPixBuf ? *alpSrcPtr : NULL,
661 dest[0], dstW, chrAlpha, dstY);
662 }
else if (c->
yuv2packed2 && vLumFilterSize == 2 &&
663 vChrFilterSize == 2) {
664 int lumAlpha = vLumFilter[2 * dstY + 1];
665 int chrAlpha = vChrFilter[2 * dstY + 1];
667 lumMmxFilter[3] = vLumFilter[2 *
dstY] * 0x10001;
669 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
671 alpPixBuf ? alpSrcPtr : NULL,
672 dest[0], dstW, lumAlpha, chrAlpha, dstY);
675 lumSrcPtr, vLumFilterSize,
676 vChrFilter + dstY * vChrFilterSize,
677 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
678 alpSrcPtr, dest[0], dstW, dstY);
681 yuv2anyX(c, vLumFilter + dstY * vLumFilterSize,
682 lumSrcPtr, vLumFilterSize,
683 vChrFilter + dstY * vChrFilterSize,
684 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
685 alpSrcPtr, dest, dstW, dstY);
692 int height = dstY - lastDstY;
702 fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
705 #if HAVE_MMXEXT_INLINE
707 __asm__
volatile (
"sfence" :::
"memory");
718 return dstY - lastDstY;