22 #include "../SDL_internal.h"
30 #define HAVE_NEON_INTRINSICS 1
34 #define HAVE_SSE2_INTRINSICS 1
37 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
39 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
41 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
43 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
44 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
48 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
49 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
65 #define DIVBY128 0.0078125f
66 #define DIVBY32768 0.000030517578125f
67 #define DIVBY8388607 0.00000011920930376163766f
70 #if NEED_SCALAR_CONVERTER_FALLBACKS
151 float *
dst = (
float *) cvt->
buf;
168 const float *
src = (
const float *) cvt->
buf;
175 const float sample = *
src;
176 if (sample >= 1.0
f) {
178 }
else if (sample <= -1.0
f) {
194 const float *
src = (
const float *) cvt->
buf;
201 const float sample = *
src;
202 if (sample >= 1.0
f) {
204 }
else if (sample <= -1.0
f) {
207 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
220 const float *
src = (
const float *) cvt->
buf;
227 const float sample = *
src;
228 if (sample >= 1.0
f) {
230 }
else if (sample <= -1.0
f) {
246 const float *
src = (
const float *) cvt->
buf;
253 const float sample = *
src;
254 if (sample >= 1.0
f) {
256 }
else if (sample <= -1.0
f) {
272 const float *
src = (
const float *) cvt->
buf;
279 const float sample = *
src;
280 if (sample >= 1.0
f) {
282 }
else if (sample <= -1.0
f) {
296 #if HAVE_SSE2_INTRINSICS
315 if ((((
size_t)
src) & 15) == 0) {
317 const __m128i *mmsrc = (
const __m128i *)
src;
318 const __m128i
zero = _mm_setzero_si128();
319 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
321 const __m128i bytes = _mm_load_si128(mmsrc);
323 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
325 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
327 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1,
zero), 16), 16)), divby128);
328 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2,
zero), 16), 16)), divby128);
329 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1,
zero), 16), 16)), divby128);
330 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2,
zero), 16), 16)), divby128);
332 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
333 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
334 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
335 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
336 i -= 16; mmsrc--;
dst -= 16;
374 if ((((
size_t)
src) & 15) == 0) {
376 const __m128i *mmsrc = (
const __m128i *)
src;
377 const __m128i
zero = _mm_setzero_si128();
378 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
379 const __m128 minus1 = _mm_set1_ps(-1.0
f);
381 const __m128i bytes = _mm_load_si128(mmsrc);
383 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
385 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
388 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1,
zero)), divby128), minus1);
389 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2,
zero)), divby128), minus1);
390 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1,
zero)), divby128), minus1);
391 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2,
zero)), divby128), minus1);
393 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
394 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
395 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
396 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
397 i -= 16; mmsrc--;
dst -= 16;
435 if ((((
size_t)
src) & 15) == 0) {
437 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
439 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
441 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
443 const __m128i
b = _mm_srai_epi32(ints, 16);
445 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768));
446 _mm_store_ps(
dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768));
483 if ((((
size_t)
src) & 15) == 0) {
485 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
486 const __m128 minus1 = _mm_set1_ps(1.0
f);
488 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
490 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
492 const __m128i
b = _mm_srli_epi32(ints, 16);
494 _mm_store_ps(
dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768), minus1));
495 _mm_store_ps(
dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768), minus1));
518 float *
dst = (
float *) cvt->
buf;
534 const __m128i *mmsrc = (
const __m128i *)
src;
537 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
538 i -= 4; mmsrc++;
dst += 4;
557 const float *
src = (
const float *) cvt->
buf;
565 const float sample = *
src;
566 if (sample >= 1.0
f) {
568 }
else if (sample <= -1.0
f) {
578 if ((((
size_t)
src) & 15) == 0) {
580 const __m128
one = _mm_set1_ps(1.0
f);
581 const __m128 negone = _mm_set1_ps(-1.0
f);
582 const __m128 mulby127 = _mm_set1_ps(127.0
f);
583 __m128i *mmdst = (__m128i *)
dst;
585 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby127));
586 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby127));
587 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one), mulby127));
588 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one), mulby127));
589 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
590 i -= 16;
src += 16; mmdst++;
597 const float sample = *
src;
598 if (sample >= 1.0
f) {
600 }
else if (sample <= -1.0
f) {
617 const float *
src = (
const float *) cvt->
buf;
625 const float sample = *
src;
626 if (sample >= 1.0
f) {
628 }
else if (sample <= -1.0
f) {
631 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
638 if ((((
size_t)
src) & 15) == 0) {
640 const __m128
one = _mm_set1_ps(1.0
f);
641 const __m128 negone = _mm_set1_ps(-1.0
f);
642 const __m128 mulby127 = _mm_set1_ps(127.0
f);
643 __m128i *mmdst = (__m128i *)
dst;
645 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one),
one), mulby127));
646 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one),
one), mulby127));
647 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one),
one), mulby127));
648 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one),
one), mulby127));
649 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
650 i -= 16;
src += 16; mmdst++;
657 const float sample = *
src;
658 if (sample >= 1.0
f) {
660 }
else if (sample <= -1.0
f) {
663 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
677 const float *
src = (
const float *) cvt->
buf;
685 const float sample = *
src;
686 if (sample >= 1.0
f) {
688 }
else if (sample <= -1.0
f) {
698 if ((((
size_t)
src) & 15) == 0) {
700 const __m128
one = _mm_set1_ps(1.0
f);
701 const __m128 negone = _mm_set1_ps(-1.0
f);
702 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
703 __m128i *mmdst = (__m128i *)
dst;
705 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
706 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
707 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
708 i -= 8;
src += 8; mmdst++;
715 const float sample = *
src;
716 if (sample >= 1.0
f) {
718 }
else if (sample <= -1.0
f) {
735 const float *
src = (
const float *) cvt->
buf;
743 const float sample = *
src;
744 if (sample >= 1.0
f) {
746 }
else if (sample <= -1.0
f) {
756 if ((((
size_t)
src) & 15) == 0) {
765 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
766 const __m128i topbit = _mm_set1_epi16(-32768);
767 const __m128
one = _mm_set1_ps(1.0
f);
768 const __m128 negone = _mm_set1_ps(-1.0
f);
769 __m128i *mmdst = (__m128i *)
dst;
771 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
772 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
773 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
774 i -= 8;
src += 8; mmdst++;
781 const float sample = *
src;
782 if (sample >= 1.0
f) {
784 }
else if (sample <= -1.0
f) {
801 const float *
src = (
const float *) cvt->
buf;
809 const float sample = *
src;
810 if (sample >= 1.0
f) {
812 }
else if (sample <= -1.0
f) {
824 const __m128
one = _mm_set1_ps(1.0
f);
825 const __m128 negone = _mm_set1_ps(-1.0
f);
826 const __m128 mulby8388607 = _mm_set1_ps(8388607.0
f);
827 __m128i *mmdst = (__m128i *)
dst;
829 _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby8388607)), 8));
830 i -= 4;
src += 4; mmdst++;
837 const float sample = *
src;
838 if (sample >= 1.0
f) {
840 }
else if (sample <= -1.0
f) {
855 #if HAVE_NEON_INTRINSICS
874 if ((((
size_t)
src) & 15) == 0) {
877 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
879 const int8x16_t bytes = vld1q_s8(mmsrc);
880 const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));
881 const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));
883 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
884 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
885 vst1q_f32(
dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
886 vst1q_f32(
dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
887 i -= 16; mmsrc -= 16;
dst -= 16;
925 if ((((
size_t)
src) & 15) == 0) {
928 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
929 const float32x4_t
one = vdupq_n_f32(1.0
f);
931 const uint8x16_t bytes = vld1q_u8(mmsrc);
932 const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));
933 const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));
935 vst1q_f32(
dst, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128,
one));
936 vst1q_f32(
dst+4, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128,
one));
937 vst1q_f32(
dst+8, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128,
one));
938 vst1q_f32(
dst+12, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128,
one));
939 i -= 16; mmsrc -= 16;
dst -= 16;
977 if ((((
size_t)
src) & 15) == 0) {
979 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
981 const int16x8_t ints = vld1q_s16((
int16_t const *)
src);
983 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
984 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
1021 if ((((
size_t)
src) & 15) == 0) {
1023 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
1024 const float32x4_t
one = vdupq_n_f32(1.0
f);
1026 const uint16x8_t uints = vld1q_u16((
uint16_t const *)
src);
1028 vst1q_f32(
dst, vmlsq_f32(
one, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1029 vst1q_f32(
dst+4, vmlsq_f32(
one, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1052 float *
dst = (
float *) cvt->
buf;
1067 const float32x4_t divby8388607 = vdupq_n_f32(
DIVBY8388607);
1071 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1072 i -= 4; mmsrc += 4;
dst += 4;
1091 const float *
src = (
const float *) cvt->
buf;
1099 const float sample = *
src;
1100 if (sample >= 1.0
f) {
1102 }
else if (sample <= -1.0
f) {
1112 if ((((
size_t)
src) & 15) == 0) {
1114 const float32x4_t
one = vdupq_n_f32(1.0
f);
1115 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1116 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1119 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby127));
1120 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby127));
1121 const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one), mulby127));
1122 const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one), mulby127));
1123 const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1124 const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4)));
1125 vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));
1126 i -= 16;
src += 16; mmdst += 16;
1133 const float sample = *
src;
1134 if (sample >= 1.0
f) {
1136 }
else if (sample <= -1.0
f) {
1153 const float *
src = (
const float *) cvt->
buf;
1161 const float sample = *
src;
1162 if (sample >= 1.0
f) {
1164 }
else if (sample <= -1.0
f) {
1167 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1174 if ((((
size_t)
src) & 15) == 0) {
1176 const float32x4_t
one = vdupq_n_f32(1.0
f);
1177 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1178 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1181 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby127));
1182 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby127));
1183 const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one),
one), mulby127));
1184 const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one),
one), mulby127));
1185 const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1186 const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4)));
1187 vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));
1188 i -= 16;
src += 16; mmdst += 16;
1196 const float sample = *
src;
1197 if (sample >= 1.0
f) {
1199 }
else if (sample <= -1.0
f) {
1202 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1216 const float *
src = (
const float *) cvt->
buf;
1224 const float sample = *
src;
1225 if (sample >= 1.0
f) {
1227 }
else if (sample <= -1.0
f) {
1237 if ((((
size_t)
src) & 15) == 0) {
1239 const float32x4_t
one = vdupq_n_f32(1.0
f);
1240 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1241 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1244 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby32767));
1245 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby32767));
1246 vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1247 i -= 8;
src += 8; mmdst += 8;
1254 const float sample = *
src;
1255 if (sample >= 1.0
f) {
1257 }
else if (sample <= -1.0
f) {
1274 const float *
src = (
const float *) cvt->
buf;
1282 const float sample = *
src;
1283 if (sample >= 1.0
f) {
1285 }
else if (sample <= -1.0
f) {
1295 if ((((
size_t)
src) & 15) == 0) {
1297 const float32x4_t
one = vdupq_n_f32(1.0
f);
1298 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1299 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1302 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby32767));
1303 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby32767));
1304 vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1305 i -= 8;
src += 8; mmdst += 8;
1312 const float sample = *
src;
1313 if (sample >= 1.0
f) {
1315 }
else if (sample <= -1.0
f) {
1332 const float *
src = (
const float *) cvt->
buf;
1340 const float sample = *
src;
1341 if (sample >= 1.0
f) {
1343 }
else if (sample <= -1.0
f) {
1355 const float32x4_t
one = vdupq_n_f32(1.0
f);
1356 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1357 const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0
f);
1360 vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby8388607)), 8));
1361 i -= 4;
src += 4; mmdst += 4;
1368 const float sample = *
src;
1369 if (sample >= 1.0
f) {
1371 }
else if (sample <= -1.0
f) {
1391 if (converters_chosen) {
1395 #define SET_CONVERTER_FUNCS(fntype) \
1396 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1397 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1398 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1399 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1400 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1401 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1402 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1403 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1404 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1405 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1406 converters_chosen = SDL_TRUE
1408 #if HAVE_SSE2_INTRINSICS
1415 #if HAVE_NEON_INTRINSICS
1422 #if NEED_SCALAR_CONVERTER_FALLBACKS
1426 #undef SET_CONVERTER_FUNCS