22 #include "../SDL_internal.h" 29 #define HAVE_NEON_INTRINSICS 1 33 #define HAVE_SSE2_INTRINSICS 1 36 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS 37 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 38 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS 39 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 40 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS 41 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 42 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS 43 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 47 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS 48 #define NEED_SCALAR_CONVERTER_FALLBACKS 1 64 #define DIVBY128 0.0078125f 65 #define DIVBY32768 0.000030517578125f 66 #define DIVBY8388607 0.00000011920930376163766f 69 #if NEED_SCALAR_CONVERTER_FALLBACKS 79 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
98 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
99 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
150 float *
dst = (
float *) cvt->
buf;
167 const float *
src = (
const float *) cvt->
buf;
174 const float sample = *
src;
175 if (sample >= 1.0
f) {
177 }
else if (sample <= -1.0
f) {
193 const float *
src = (
const float *) cvt->
buf;
200 const float sample = *
src;
201 if (sample >= 1.0
f) {
203 }
else if (sample <= -1.0
f) {
206 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
219 const float *
src = (
const float *) cvt->
buf;
226 const float sample = *
src;
227 if (sample >= 1.0
f) {
229 }
else if (sample <= -1.0
f) {
245 const float *
src = (
const float *) cvt->
buf;
252 const float sample = *
src;
253 if (sample >= 1.0
f) {
255 }
else if (sample <= -1.0
f) {
271 const float *
src = (
const float *) cvt->
buf;
278 const float sample = *
src;
279 if (sample >= 1.0
f) {
281 }
else if (sample <= -1.0
f) {
295 #if HAVE_SSE2_INTRINSICS 306 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
310 src -= 15; dst -= 15;
311 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
314 if ((((
size_t) src) & 15) == 0) {
316 const __m128i *mmsrc = (
const __m128i *) src;
317 const __m128i
zero = _mm_setzero_si128();
318 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
320 const __m128i bytes = _mm_load_si128(mmsrc);
322 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
324 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
326 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
327 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
328 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
329 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
331 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
332 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
333 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
334 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
335 i -= 16; mmsrc--; dst -= 16;
338 src = (
const Sint8 *) mmsrc;
341 src += 15; dst += 15;
365 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
366 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
369 src -= 15; dst -= 15;
370 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
373 if ((((
size_t) src) & 15) == 0) {
375 const __m128i *mmsrc = (
const __m128i *) src;
376 const __m128i
zero = _mm_setzero_si128();
377 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
378 const __m128 minus1 = _mm_set1_ps(-1.0
f);
380 const __m128i bytes = _mm_load_si128(mmsrc);
382 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
384 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
387 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
388 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
389 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
390 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
392 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
393 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
394 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
395 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
396 i -= 16; mmsrc--; dst -= 16;
399 src = (
const Uint8 *) mmsrc;
402 src += 15; dst += 15;
406 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
431 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
434 if ((((
size_t) src) & 15) == 0) {
436 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
438 const __m128i ints = _mm_load_si128((__m128i
const *) src);
440 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
442 const __m128i
b = _mm_srai_epi32(ints, 16);
444 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
445 _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
446 i -= 8; src -= 8; dst -= 8;
479 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
482 if ((((
size_t) src) & 15) == 0) {
484 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
485 const __m128 minus1 = _mm_set1_ps(1.0
f);
487 const __m128i ints = _mm_load_si128((__m128i
const *) src);
489 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
491 const __m128i
b = _mm_srli_epi32(ints, 16);
493 _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
494 _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
495 i -= 8; src -= 8; dst -= 8;
517 float *
dst = (
float *) cvt->
buf;
527 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
530 if ((((
size_t) src) & 15) == 0) {
533 const __m128i *mmsrc = (
const __m128i *) src;
536 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
537 i -= 4; mmsrc++; dst += 4;
539 src = (
const Sint32 *) mmsrc;
556 const float *
src = (
const float *) cvt->
buf;
564 const float sample = *
src;
565 if (sample >= 1.0
f) {
567 }
else if (sample <= -1.0
f) {
570 *dst = (
Sint8)(sample * 127.0
f);
574 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
577 if ((((
size_t) src) & 15) == 0) {
579 const __m128
one = _mm_set1_ps(1.0
f);
580 const __m128 negone = _mm_set1_ps(-1.0
f);
581 const __m128 mulby127 = _mm_set1_ps(127.0
f);
582 __m128i *mmdst = (__m128i *) dst;
584 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127));
585 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127));
586 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127));
587 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127));
588 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
589 i -= 16; src += 16; mmdst++;
591 dst = (
Sint8 *) mmdst;
596 const float sample = *
src;
597 if (sample >= 1.0
f) {
599 }
else if (sample <= -1.0
f) {
602 *dst = (
Sint8)(sample * 127.0
f);
616 const float *
src = (
const float *) cvt->
buf;
624 const float sample = *
src;
625 if (sample >= 1.0
f) {
627 }
else if (sample <= -1.0
f) {
630 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
634 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
637 if ((((
size_t) src) & 15) == 0) {
639 const __m128
one = _mm_set1_ps(1.0
f);
640 const __m128 negone = _mm_set1_ps(-1.0
f);
641 const __m128 mulby127 = _mm_set1_ps(127.0
f);
642 __m128i *mmdst = (__m128i *) dst;
644 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127));
645 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127));
646 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127));
647 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127));
648 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
649 i -= 16; src += 16; mmdst++;
651 dst = (
Uint8 *) mmdst;
656 const float sample = *
src;
657 if (sample >= 1.0
f) {
659 }
else if (sample <= -1.0
f) {
662 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
676 const float *
src = (
const float *) cvt->
buf;
684 const float sample = *
src;
685 if (sample >= 1.0
f) {
687 }
else if (sample <= -1.0
f) {
690 *dst = (
Sint16)(sample * 32767.0
f);
694 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
697 if ((((
size_t) src) & 15) == 0) {
699 const __m128
one = _mm_set1_ps(1.0
f);
700 const __m128 negone = _mm_set1_ps(-1.0
f);
701 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
702 __m128i *mmdst = (__m128i *) dst;
704 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));
705 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));
706 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
707 i -= 8; src += 8; mmdst++;
714 const float sample = *
src;
715 if (sample >= 1.0
f) {
717 }
else if (sample <= -1.0
f) {
720 *dst = (
Sint16)(sample * 32767.0
f);
734 const float *
src = (
const float *) cvt->
buf;
742 const float sample = *
src;
743 if (sample >= 1.0
f) {
745 }
else if (sample <= -1.0
f) {
748 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
752 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
755 if ((((
size_t) src) & 15) == 0) {
764 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
765 const __m128i topbit = _mm_set1_epi16(-32768);
766 const __m128
one = _mm_set1_ps(1.0
f);
767 const __m128 negone = _mm_set1_ps(-1.0
f);
768 __m128i *mmdst = (__m128i *) dst;
770 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));
771 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));
772 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
773 i -= 8; src += 8; mmdst++;
780 const float sample = *
src;
781 if (sample >= 1.0
f) {
783 }
else if (sample <= -1.0
f) {
786 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
800 const float *
src = (
const float *) cvt->
buf;
808 const float sample = *
src;
809 if (sample >= 1.0
f) {
811 }
else if (sample <= -1.0
f) {
812 *dst = (
Sint32) -2147483648LL;
814 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
818 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
819 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
823 const __m128
one = _mm_set1_ps(1.0
f);
824 const __m128 negone = _mm_set1_ps(-1.0
f);
825 const __m128 mulby8388607 = _mm_set1_ps(8388607.0
f);
826 __m128i *mmdst = (__m128i *) dst;
828 _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8));
829 i -= 4; src += 4; mmdst++;
836 const float sample = *
src;
837 if (sample >= 1.0
f) {
839 }
else if (sample <= -1.0
f) {
840 *dst = (
Sint32) -2147483648LL;
842 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
854 #if HAVE_NEON_INTRINSICS 865 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
869 src -= 15; dst -= 15;
870 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
873 if ((((
size_t) src) & 15) == 0) {
876 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
878 const int8x16_t bytes = vld1q_s8(mmsrc);
879 const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));
880 const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));
882 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
883 vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
884 vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
885 vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
886 i -= 16; mmsrc -= 16; dst -= 16;
889 src = (
const Sint8 *) mmsrc;
892 src += 15; dst += 15;
916 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
917 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
920 src -= 15; dst -= 15;
921 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
924 if ((((
size_t) src) & 15) == 0) {
927 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
928 const float32x4_t negone = vdupq_n_f32(-1.0
f);
930 const uint8x16_t bytes = vld1q_u8(mmsrc);
931 const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));
932 const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));
934 vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
935 vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
936 vst1q_f32(dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
937 vst1q_f32(dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
938 i -= 16; mmsrc -= 16; dst -= 16;
941 src = (
const Uint8 *) mmsrc;
944 src += 15; dst += 15;
948 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
973 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
976 if ((((
size_t) src) & 15) == 0) {
978 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
980 const int16x8_t ints = vld1q_s16((
int16_t const *) src);
982 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
983 vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
984 i -= 8; src -= 8; dst -= 8;
1017 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1020 if ((((
size_t) src) & 15) == 0) {
1022 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
1023 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1025 const uint16x8_t uints = vld1q_u16((
uint16_t const *) src);
1027 vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1028 vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1029 i -= 8; src -= 8; dst -= 8;
1051 float *
dst = (
float *) cvt->
buf;
1061 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1064 if ((((
size_t) src) & 15) == 0) {
1066 const float32x4_t divby8388607 = vdupq_n_f32(
DIVBY8388607);
1070 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1071 i -= 4; mmsrc += 4; dst += 4;
1073 src = (
const Sint32 *) mmsrc;
1090 const float *
src = (
const float *) cvt->
buf;
1098 const float sample = *
src;
1099 if (sample >= 1.0
f) {
1101 }
else if (sample <= -1.0
f) {
1104 *dst = (
Sint8)(sample * 127.0
f);
1108 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1111 if ((((
size_t) src) & 15) == 0) {
1113 const float32x4_t
one = vdupq_n_f32(1.0
f);
1114 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1115 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1118 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127));
1119 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127));
1120 const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127));
1121 const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127));
1122 const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1123 const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4)));
1124 vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));
1125 i -= 16; src += 16; mmdst += 16;
1127 dst = (
Sint8 *) mmdst;
1132 const float sample = *
src;
1133 if (sample >= 1.0
f) {
1135 }
else if (sample <= -1.0
f) {
1138 *dst = (
Sint8)(sample * 127.0
f);
1152 const float *
src = (
const float *) cvt->
buf;
1160 const float sample = *
src;
1161 if (sample >= 1.0
f) {
1163 }
else if (sample <= -1.0
f) {
1166 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1170 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1173 if ((((
size_t) src) & 15) == 0) {
1175 const float32x4_t
one = vdupq_n_f32(1.0
f);
1176 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1177 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1180 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127));
1181 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127));
1182 const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127));
1183 const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127));
1184 const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1185 const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4)));
1186 vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));
1187 i -= 16; src += 16; mmdst += 16;
1190 dst = (
Uint8 *) mmdst;
1195 const float sample = *
src;
1196 if (sample >= 1.0
f) {
1198 }
else if (sample <= -1.0
f) {
1201 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1215 const float *
src = (
const float *) cvt->
buf;
1223 const float sample = *
src;
1224 if (sample >= 1.0
f) {
1226 }
else if (sample <= -1.0
f) {
1229 *dst = (
Sint16)(sample * 32767.0
f);
1233 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1236 if ((((
size_t) src) & 15) == 0) {
1238 const float32x4_t
one = vdupq_n_f32(1.0
f);
1239 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1240 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1243 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767));
1244 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767));
1245 vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1246 i -= 8; src += 8; mmdst += 8;
1253 const float sample = *
src;
1254 if (sample >= 1.0
f) {
1256 }
else if (sample <= -1.0
f) {
1259 *dst = (
Sint16)(sample * 32767.0
f);
1273 const float *
src = (
const float *) cvt->
buf;
1281 const float sample = *
src;
1282 if (sample >= 1.0
f) {
1284 }
else if (sample <= -1.0
f) {
1287 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
1291 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1294 if ((((
size_t) src) & 15) == 0) {
1296 const float32x4_t
one = vdupq_n_f32(1.0
f);
1297 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1298 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1301 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767));
1302 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767));
1303 vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1304 i -= 8; src += 8; mmdst += 8;
1311 const float sample = *
src;
1312 if (sample >= 1.0
f) {
1314 }
else if (sample <= -1.0
f) {
1317 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
1331 const float *
src = (
const float *) cvt->
buf;
1339 const float sample = *
src;
1340 if (sample >= 1.0
f) {
1342 }
else if (sample <= -1.0
f) {
1343 *dst = (-2147483647) - 1;
1345 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
1349 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1350 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
1354 const float32x4_t
one = vdupq_n_f32(1.0
f);
1355 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1356 const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0
f);
1359 vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
1360 i -= 4; src += 4; mmdst += 4;
1367 const float sample = *
src;
1368 if (sample >= 1.0
f) {
1370 }
else if (sample <= -1.0
f) {
1371 *dst = (-2147483647) - 1;
1373 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
1390 if (converters_chosen) {
1394 #define SET_CONVERTER_FUNCS(fntype) \ 1395 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ 1396 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ 1397 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \ 1398 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \ 1399 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \ 1400 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \ 1401 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ 1402 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ 1403 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \ 1404 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ 1405 converters_chosen = SDL_TRUE 1407 #if HAVE_SSE2_INTRINSICS 1414 #if HAVE_NEON_INTRINSICS 1421 #if NEED_SCALAR_CONVERTER_FALLBACKS 1425 #undef SET_CONVERTER_FUNCS #define LOG_DEBUG_CONVERT(from, to)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
Uint16 SDL_AudioFormat
Audio format flags.
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
#define SDL_assert(condition)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S8_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
SDL_AudioFilter SDL_Convert_S32_to_F32
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)