23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
67 #define LICENSE_PREFIX "libswscale license: "
71 #define RET 0xC3 // near return opcode for x86
204 return "Unknown format";
211 return ((d * dist + c) * dist + b) * dist +
a;
214 b + 2.0 * c + 3.0 * d,
216 -b - 3.0 * c - 6.0 * d,
221 int *outFilterSize,
int xInc,
int srcW,
222 int dstW,
int filterAlign,
int one,
225 double param[2],
int is_horizontal)
232 int64_t *filter2 =
NULL;
233 const int64_t fone = 1LL << 54;
241 if (
FFABS(xInc - 0x10000) < 10) {
245 dstW *
sizeof(*filter) * filterSize, fail);
247 for (i = 0; i < dstW; i++) {
248 filter[i * filterSize] = fone;
256 dstW *
sizeof(*filter) * filterSize, fail);
258 xDstInSrc = xInc / 2 - 0x8000;
259 for (i = 0; i < dstW; i++) {
260 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
262 (*filterPos)[i] = xx;
266 }
else if ((xInc <= (1 << 16) && (flags &
SWS_AREA)) ||
272 dstW *
sizeof(*filter) * filterSize, fail);
274 xDstInSrc = xInc / 2 - 0x8000;
275 for (i = 0; i < dstW; i++) {
276 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
279 (*filterPos)[i] = xx;
281 for (j = 0; j < filterSize; j++) {
282 int64_t coeff = fone -
FFABS((xx << 16) - xDstInSrc) *
286 filter[i * filterSize + j] = coeff;
297 else if (flags &
SWS_X)
317 filterSize = 1 + sizeFactor;
319 filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
321 filterSize =
FFMIN(filterSize, srcW - 2);
322 filterSize =
FFMAX(filterSize, 1);
325 dstW *
sizeof(*filter) * filterSize, fail);
327 xDstInSrc = xInc - 0x10000;
328 for (i = 0; i < dstW; i++) {
329 int xx = (xDstInSrc - ((int64_t)(filterSize - 2) << 16)) / (1 << 17);
331 (*filterPos)[i] = xx;
332 for (j = 0; j < filterSize; j++) {
333 int64_t d = (
FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
339 floatd = d * (1.0 / (1 << 30));
341 if (flags & SWS_BICUBIC) {
345 if (d >= 1LL << 31) {
348 int64_t dd = (d * d) >> 30;
349 int64_t ddd = (dd * d) >> 30;
352 coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
353 (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
354 (6 * (1 << 24) - 2 * B) * (1 << 30);
356 coeff = (-B - 6 * C) * ddd +
357 (6 * B + 30 * C) * dd +
358 (-12 * B - 48 * C) * d +
359 (8 * B + 24 * C) * (1 << 30);
361 coeff *= fone >> (30 + 24);
364 else if (flags & SWS_X) {
365 double p = param ? param * 0.01 : 0.3;
366 coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
367 coeff *= pow(2.0, -p * d * d);
370 else if (flags & SWS_X) {
375 c = cos(floatd * M_PI);
382 coeff = (c * 0.5 + 0.5) * fone;
383 }
else if (flags & SWS_AREA) {
384 int64_t d2 = d - (1 << 29);
385 if (d2 * xInc < -(1LL << (29 + 16)))
386 coeff = 1.0 * (1LL << (30 + 16));
387 else if (d2 * xInc < (1LL << (29 + 16)))
388 coeff = -d2 * xInc + (1LL << (29 + 16));
391 coeff *= fone >> (30 + 16);
392 }
else if (flags & SWS_GAUSS) {
394 coeff = (pow(2.0, -p * floatd * floatd)) * fone;
395 }
else if (flags & SWS_SINC) {
396 coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
397 }
else if (flags & SWS_LANCZOS) {
399 coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
400 (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
403 }
else if (flags & SWS_BILINEAR) {
404 coeff = (1 << 30) - d;
408 }
else if (flags & SWS_SPLINE) {
409 double p = -2.196152422706632;
416 filter[i * filterSize + j] = coeff;
419 xDstInSrc += 2 * xInc;
426 assert(filterSize > 0);
427 filter2Size = filterSize;
429 filter2Size += srcFilter->
length - 1;
431 filter2Size += dstFilter->
length - 1;
432 assert(filter2Size > 0);
435 for (i = 0; i < dstW; i++) {
439 for (k = 0; k < srcFilter->
length; k++) {
440 for (j = 0; j < filterSize; j++)
441 filter2[i * filter2Size + k + j] +=
442 srcFilter->
coeff[k] * filter[i * filterSize + j];
445 for (j = 0; j < filterSize; j++)
446 filter2[i * filter2Size + j] = filter[i * filterSize + j];
450 (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
457 for (i = dstW - 1; i >= 0; i--) {
458 int min = filter2Size;
460 int64_t cutOff = 0.0;
463 for (j = 0; j < filter2Size; j++) {
465 cutOff +=
FFABS(filter2[i * filter2Size]);
472 if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
476 for (k = 1; k < filter2Size; k++)
477 filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
478 filter2[i * filter2Size + k - 1] = 0;
484 for (j = filter2Size - 1; j > 0; j--) {
485 cutOff +=
FFABS(filter2[i * filter2Size + j]);
492 if (min > minFilterSize)
498 if (minFilterSize < 5)
504 if (minFilterSize < 3)
510 if (minFilterSize == 1 && filterAlign == 2)
514 assert(minFilterSize > 0);
515 filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
516 assert(filterSize > 0);
517 filter =
av_malloc(filterSize * dstW *
sizeof(*filter));
521 *outFilterSize = filterSize;
525 "SwScaler: reducing / aligning filtersize %d -> %d\n",
526 filter2Size, filterSize);
528 for (i = 0; i < dstW; i++) {
531 for (j = 0; j < filterSize; j++) {
532 if (j >= filter2Size)
533 filter[i * filterSize + j] = 0;
535 filter[i * filterSize + j] = filter2[i * filter2Size + j];
537 filter[i * filterSize + j] = 0;
545 for (i = 0; i < dstW; i++) {
547 if ((*filterPos)[i] < 0) {
549 for (j = 1; j < filterSize; j++) {
550 int left =
FFMAX(j + (*filterPos)[i], 0);
551 filter[i * filterSize + left] += filter[i * filterSize + j];
552 filter[i * filterSize + j] = 0;
557 if ((*filterPos)[i] + filterSize > srcW) {
558 int shift = (*filterPos)[i] + filterSize - srcW;
560 for (j = filterSize - 2; j >= 0; j--) {
561 int right =
FFMIN(j + shift, filterSize - 1);
562 filter[i * filterSize + right] += filter[i * filterSize + j];
563 filter[i * filterSize + j] = 0;
565 (*filterPos)[i] = srcW - filterSize;
573 *outFilterSize * (dstW + 3) *
sizeof(int16_t), fail);
576 for (i = 0; i < dstW; i++) {
581 for (j = 0; j < filterSize; j++) {
582 sum += filter[i * filterSize + j];
584 sum = (sum + one / 2) / one;
585 for (j = 0; j < *outFilterSize; j++) {
586 int64_t v = filter[i * filterSize + j] + error;
588 (*outFilter)[i * (*outFilterSize) + j] = intV;
589 error = v - intV * sum;
593 (*filterPos)[dstW + 0] =
594 (*filterPos)[dstW + 1] =
595 (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
597 for (i = 0; i < *outFilterSize; i++) {
598 int k = (dstW - 1) * (*outFilterSize) + i;
599 (*outFilter)[k + 1 * (*outFilterSize)] =
600 (*outFilter)[k + 2 * (*outFilterSize)] =
601 (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
612 #if HAVE_MMXEXT_INLINE
613 static av_cold int init_hscaler_mmxext(
int dstW,
int xInc,
uint8_t *filterCode,
642 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
643 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
644 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
645 "punpcklbw %%mm7, %%mm1 \n\t"
646 "punpcklbw %%mm7, %%mm0 \n\t"
647 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
649 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
651 "psubw %%mm1, %%mm0 \n\t"
652 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
653 "pmullw %%mm3, %%mm0 \n\t"
654 "psllw $7, %%mm1 \n\t"
655 "paddw %%mm1, %%mm0 \n\t"
657 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
659 "add $8, %%"REG_a
" \n\t"
674 :
"=r" (fragmentA),
"=r" (imm8OfPShufW1A),
"=r" (imm8OfPShufW2A),
675 "=r" (fragmentLengthA)
682 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
683 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
684 "punpcklbw %%mm7, %%mm0 \n\t"
685 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
687 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
689 "psubw %%mm1, %%mm0 \n\t"
690 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
691 "pmullw %%mm3, %%mm0 \n\t"
692 "psllw $7, %%mm1 \n\t"
693 "paddw %%mm1, %%mm0 \n\t"
695 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
697 "add $8, %%"REG_a
" \n\t"
712 :
"=r" (fragmentB),
"=r" (imm8OfPShufW1B),
"=r" (imm8OfPShufW2B),
713 "=r" (fragmentLengthB)
719 for (i = 0; i < dstW / numSplits; i++) {
724 int b = ((xpos + xInc) >> 16) - xx;
725 int c = ((xpos + xInc * 2) >> 16) - xx;
726 int d = ((xpos + xInc * 3) >> 16) - xx;
727 int inc = (d + 1 < 4);
728 uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
729 x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
730 x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
731 x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
732 int maxShift = 3 - (d + inc);
736 filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
737 filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
738 filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
739 filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
740 filterPos[i / 2] = xx;
742 memcpy(filterCode + fragmentPos, fragment, fragmentLength);
744 filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
748 filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
752 if (i + 4 - inc >= dstW)
754 else if ((filterPos[i / 2] & 3) <= maxShift)
755 shift = filterPos[i / 2] & 3;
757 if (shift && i >= shift) {
758 filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
759 filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
760 filterPos[i / 2] -= shift;
764 fragmentPos += fragmentLength;
767 filterCode[fragmentPos] =
RET;
772 filterPos[((i / 2) + 1) & (~1)] = xpos >> 16;
774 return fragmentPos + 1;
786 int srcRange,
const int table[4],
int dstRange,
787 int brightness,
int contrast,
int saturation)
806 contrast, saturation);
811 contrast, saturation);
816 int *srcRange,
int **table,
int *dstRange,
817 int *brightness,
int *contrast,
int *saturation)
869 int usesVFilter, usesHFilter;
876 int dst_stride =
FFALIGN(dstW *
sizeof(int16_t) + 16, 16);
877 int dst_stride_px = dst_stride >> 1;
890 unscaled = (srcW == dstW && srcH == dstH);
920 if (dstW < srcW && dstH < srcH)
922 else if (dstW > srcW && dstH > srcH)
927 }
else if (i & (i - 1)) {
929 "Exactly one scaler algorithm must be chosen\n");
933 if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
937 srcW, srcH, dstW, dstH);
942 dstFilter = &dummyFilter;
944 srcFilter = &dummyFilter;
946 c->
lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
947 c->
lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
950 c->
vRounder = 4 * 0x0001000100010001ULL;
967 "%s output is not supported with half chroma resolution, switching to full\n",
986 "full chroma interpolation for destination format '%s' not yet implemented\n",
1019 if (unscaled && !usesHFilter && !usesVFilter &&
1026 "using unscaled %s -> %s special converter\n",
1045 (srcW & 15) == 0) ? 1 : 0;
1050 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1067 if (flags & SWS_FAST_BILINEAR) {
1074 c->
lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
1079 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1083 #if HAVE_MMXEXT_INLINE
1093 PROT_READ | PROT_WRITE,
1094 MAP_PRIVATE | MAP_ANONYMOUS,
1097 PROT_READ | PROT_WRITE,
1098 MAP_PRIVATE | MAP_ANONYMOUS,
1100 #elif HAVE_VIRTUALALLOC
1104 PAGE_EXECUTE_READWRITE);
1108 PAGE_EXECUTE_READWRITE);
1133 const int filterAlign =
X86_MMX(cpu_flags) ? 4 :
1138 srcW, dstW, filterAlign, 1 << 14,
1140 cpu_flags, srcFilter->
lumH, dstFilter->
lumH,
1146 (flags & SWS_BICUBLIN) ? (flags |
SWS_BILINEAR) : flags,
1147 cpu_flags, srcFilter->
chrH, dstFilter->
chrH,
1155 const int filterAlign =
X86_MMX(cpu_flags) ? 2 :
1159 c->
lumYInc, srcH, dstH, filterAlign, (1 << 12),
1161 cpu_flags, srcFilter->
lumV, dstFilter->
lumV,
1166 filterAlign, (1 << 12),
1167 (flags & SWS_BICUBLIN) ? (flags |
SWS_BILINEAR) : flags,
1168 cpu_flags, srcFilter->
chrV, dstFilter->
chrV,
1178 short *p = (
short *)&c->vYCoeffsBank[i];
1179 for (j = 0; j < 8; j++)
1185 short *p = (
short *)&c->vCCoeffsBank[i];
1186 for (j = 0; j < 8; j++)
1195 for (i = 0; i < dstH; i++) {
1196 int chrI = (int64_t)i * c->
chrDstH / dstH;
1223 dst_stride + 16, fail);
1231 dst_stride * 2 + 32, fail);
1239 dst_stride + 16, fail);
1245 memset(c->
chrUPixBuf[i], 64, dst_stride * 2 + 1);
1250 if (flags & SWS_FAST_BILINEAR)
1256 else if (flags &
SWS_X)
1300 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1303 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1314 #if FF_API_SWS_GETCONTEXT
1318 SwsFilter *dstFilter,
const double *param)
1336 c->
param[0] = param[0];
1337 c->
param[1] = param[1];
1353 float lumaSharpen,
float chromaSharpen,
1354 float chromaHShift,
float chromaVShift,
1361 if (lumaGBlur != 0.0) {
1369 if (chromaGBlur != 0.0) {
1377 if (chromaSharpen != 0.0) {
1386 if (lumaSharpen != 0.0) {
1395 if (chromaHShift != 0.0)
1398 if (chromaVShift != 0.0)
1428 const int length = (int)(variance * quality + 0.5) | 1;
1430 double middle = (length - 1) * 0.5;
1436 for (i = 0; i < length; i++) {
1437 double dist = i - middle;
1438 vec->
coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
1439 sqrt(2 * variance * M_PI);
1455 for (i = 0; i < length; i++)
1471 for (i = 0; i < a->
length; i++)
1481 for (i = 0; i < a->
length; i++)
1482 a->
coeff[i] *= scalar;
1499 for (i = 0; i < a->
length; i++) {
1500 for (j = 0; j < b->
length; j++) {
1517 for (i = 0; i < a->
length; i++)
1534 for (i = 0; i < a->
length; i++)
1552 for (i = 0; i < a->
length; i++) {
1553 vec->
coeff[i + (length - 1) / 2 -
1604 for (i = 0; i < a->
length; i++)
1617 for (i = 0; i < a->
length; i++)
1618 if (a->
coeff[i] > max)
1621 for (i = 0; i < a->
length; i++)
1622 if (a->
coeff[i] < min)
1627 for (i = 0; i < a->
length; i++) {
1628 int x = (int)((a->
coeff[i] - min) * 60.0 / range + 0.5);
1629 av_log(log_ctx, log_level,
"%1.3f ", a->
coeff[i]);
1631 av_log(log_ctx, log_level,
" ");
1632 av_log(log_ctx, log_level,
"|\n");
1706 #elif HAVE_VIRTUALALLOC
1731 const double *
param)
1737 param = default_param;
1740 (context->
srcW != srcW ||
1741 context->
srcH != srcH ||
1743 context->
dstW != dstW ||
1744 context->
dstH != dstH ||
1746 context->
flags != flags ||
1747 context->
param[0] != param[0] ||
1748 context->
param[1] != param[1])) {
1765 context->
param[0] = param[0];
1766 context->
param[1] = param[1];
1770 context->
dstRange, 0, 1 << 16, 1 << 16);