SDL  2.0
SDL_audiotypecvt.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #include "../SDL_internal.h"
23 #include "SDL_audio.h"
24 #include "SDL_audio_c.h"
25 #include "SDL_cpuinfo.h"
26 #include "SDL_assert.h"
27 
28 #ifdef __ARM_NEON
29 #define HAVE_NEON_INTRINSICS 1
30 #endif
31 
32 #ifdef __SSE2__
33 #define HAVE_SSE2_INTRINSICS 1
34 #endif
35 
36 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
37 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
38 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
39 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* Mac OS X/Intel guarantees SSE2. */
40 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
41 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* ARMv8+ promise NEON. */
42 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
43 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* All Apple ARMv7 chips promise NEON support. */
44 #endif
45 
46 /* Set to zero if platform is guaranteed to use a SIMD codepath here. */
47 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
48 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
49 #endif
50 
51 /* Function pointers set to a CPU-specific implementation. */
62 
63 
64 #define DIVBY128 0.0078125f
65 #define DIVBY32768 0.000030517578125f
66 #define DIVBY8388607 0.00000011920930376163766f
67 
68 
69 #if NEED_SCALAR_CONVERTER_FALLBACKS
70 static void SDLCALL
72 {
73  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
74  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
75  int i;
76 
77  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
78 
79  for (i = cvt->len_cvt; i; --i, --src, --dst) {
80  *dst = ((float) *src) * DIVBY128;
81  }
82 
83  cvt->len_cvt *= 4;
84  if (cvt->filters[++cvt->filter_index]) {
85  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
86  }
87 }
88 
89 static void SDLCALL
91 {
92  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
93  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
94  int i;
95 
96  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
97 
98  for (i = cvt->len_cvt; i; --i, --src, --dst) {
99  *dst = (((float) *src) * DIVBY128) - 1.0f;
100  }
101 
102  cvt->len_cvt *= 4;
103  if (cvt->filters[++cvt->filter_index]) {
104  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
105  }
106 }
107 
108 static void SDLCALL
110 {
111  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
112  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
113  int i;
114 
115  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
116 
117  for (i = cvt->len_cvt / sizeof (Sint16); i; --i, --src, --dst) {
118  *dst = ((float) *src) * DIVBY32768;
119  }
120 
121  cvt->len_cvt *= 2;
122  if (cvt->filters[++cvt->filter_index]) {
123  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
124  }
125 }
126 
127 static void SDLCALL
129 {
130  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
131  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
132  int i;
133 
134  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
135 
136  for (i = cvt->len_cvt / sizeof (Uint16); i; --i, --src, --dst) {
137  *dst = (((float) *src) * DIVBY32768) - 1.0f;
138  }
139 
140  cvt->len_cvt *= 2;
141  if (cvt->filters[++cvt->filter_index]) {
142  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
143  }
144 }
145 
146 static void SDLCALL
148 {
149  const Sint32 *src = (const Sint32 *) cvt->buf;
150  float *dst = (float *) cvt->buf;
151  int i;
152 
153  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
154 
155  for (i = cvt->len_cvt / sizeof (Sint32); i; --i, ++src, ++dst) {
156  *dst = ((float) (*src>>8)) * DIVBY8388607;
157  }
158 
159  if (cvt->filters[++cvt->filter_index]) {
160  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
161  }
162 }
163 
164 static void SDLCALL
166 {
167  const float *src = (const float *) cvt->buf;
168  Sint8 *dst = (Sint8 *) cvt->buf;
169  int i;
170 
171  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
172 
173  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
174  const float sample = *src;
175  if (sample >= 1.0f) {
176  *dst = 127;
177  } else if (sample <= -1.0f) {
178  *dst = -128;
179  } else {
180  *dst = (Sint8)(sample * 127.0f);
181  }
182  }
183 
184  cvt->len_cvt /= 4;
185  if (cvt->filters[++cvt->filter_index]) {
186  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
187  }
188 }
189 
190 static void SDLCALL
192 {
193  const float *src = (const float *) cvt->buf;
194  Uint8 *dst = (Uint8 *) cvt->buf;
195  int i;
196 
197  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
198 
199  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
200  const float sample = *src;
201  if (sample >= 1.0f) {
202  *dst = 255;
203  } else if (sample <= -1.0f) {
204  *dst = 0;
205  } else {
206  *dst = (Uint8)((sample + 1.0f) * 127.0f);
207  }
208  }
209 
210  cvt->len_cvt /= 4;
211  if (cvt->filters[++cvt->filter_index]) {
212  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
213  }
214 }
215 
216 static void SDLCALL
218 {
219  const float *src = (const float *) cvt->buf;
220  Sint16 *dst = (Sint16 *) cvt->buf;
221  int i;
222 
223  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
224 
225  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
226  const float sample = *src;
227  if (sample >= 1.0f) {
228  *dst = 32767;
229  } else if (sample <= -1.0f) {
230  *dst = -32768;
231  } else {
232  *dst = (Sint16)(sample * 32767.0f);
233  }
234  }
235 
236  cvt->len_cvt /= 2;
237  if (cvt->filters[++cvt->filter_index]) {
238  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
239  }
240 }
241 
242 static void SDLCALL
244 {
245  const float *src = (const float *) cvt->buf;
246  Uint16 *dst = (Uint16 *) cvt->buf;
247  int i;
248 
249  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
250 
251  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
252  const float sample = *src;
253  if (sample >= 1.0f) {
254  *dst = 65535;
255  } else if (sample <= -1.0f) {
256  *dst = 0;
257  } else {
258  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
259  }
260  }
261 
262  cvt->len_cvt /= 2;
263  if (cvt->filters[++cvt->filter_index]) {
264  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
265  }
266 }
267 
268 static void SDLCALL
270 {
271  const float *src = (const float *) cvt->buf;
272  Sint32 *dst = (Sint32 *) cvt->buf;
273  int i;
274 
275  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
276 
277  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
278  const float sample = *src;
279  if (sample >= 1.0f) {
280  *dst = 2147483647;
281  } else if (sample <= -1.0f) {
282  *dst = (Sint32) -2147483648LL;
283  } else {
284  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
285  }
286  }
287 
288  if (cvt->filters[++cvt->filter_index]) {
289  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
290  }
291 }
292 #endif
293 
294 
295 #if HAVE_SSE2_INTRINSICS
296 static void SDLCALL
297 SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
298 {
299  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
300  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
301  int i;
302 
303  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
304 
305  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
306  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
307  *dst = ((float) *src) * DIVBY128;
308  }
309 
310  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
311  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
312 
313  /* Make sure src is aligned too. */
314  if ((((size_t) src) & 15) == 0) {
315  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
316  const __m128i *mmsrc = (const __m128i *) src;
317  const __m128i zero = _mm_setzero_si128();
318  const __m128 divby128 = _mm_set1_ps(DIVBY128);
319  while (i >= 16) { /* 16 * 8-bit */
320  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 sint8 into an XMM register. */
321  /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
322  const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
323  /* right-shift-sign-extend gets us sint16 with the other set of values. */
324  const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
325  /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
326  const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
327  const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
328  const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
329  const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
330  /* Interleave back into correct order, store. */
331  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
332  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
333  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
334  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
335  i -= 16; mmsrc--; dst -= 16;
336  }
337 
338  src = (const Sint8 *) mmsrc;
339  }
340 
341  src += 15; dst += 15; /* adjust for any scalar finishing. */
342 
343  /* Finish off any leftovers with scalar operations. */
344  while (i) {
345  *dst = ((float) *src) * DIVBY128;
346  i--; src--; dst--;
347  }
348 
349  cvt->len_cvt *= 4;
350  if (cvt->filters[++cvt->filter_index]) {
351  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
352  }
353 }
354 
355 static void SDLCALL
356 SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
357 {
358  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
359  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
360  int i;
361 
362  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
363 
364  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
365  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
366  *dst = (((float) *src) * DIVBY128) - 1.0f;
367  }
368 
369  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
370  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
371 
372  /* Make sure src is aligned too. */
373  if ((((size_t) src) & 15) == 0) {
374  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
375  const __m128i *mmsrc = (const __m128i *) src;
376  const __m128i zero = _mm_setzero_si128();
377  const __m128 divby128 = _mm_set1_ps(DIVBY128);
378  const __m128 minus1 = _mm_set1_ps(-1.0f);
379  while (i >= 16) { /* 16 * 8-bit */
380  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 uint8 into an XMM register. */
381  /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
382  const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
383  /* right-shift-zero-extend gets us uint16 with the other set of values. */
384  const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
385  /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
386  /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
387  const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
388  const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
389  const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
390  const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
391  /* Interleave back into correct order, store. */
392  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
393  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
394  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
395  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
396  i -= 16; mmsrc--; dst -= 16;
397  }
398 
399  src = (const Uint8 *) mmsrc;
400  }
401 
402  src += 15; dst += 15; /* adjust for any scalar finishing. */
403 
404  /* Finish off any leftovers with scalar operations. */
405  while (i) {
406  *dst = (((float) *src) * DIVBY128) - 1.0f;
407  i--; src--; dst--;
408  }
409 
410  cvt->len_cvt *= 4;
411  if (cvt->filters[++cvt->filter_index]) {
412  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
413  }
414 }
415 
416 static void SDLCALL
417 SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
418 {
419  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
420  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
421  int i;
422 
423  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
424 
425  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
426  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
427  *dst = ((float) *src) * DIVBY32768;
428  }
429 
430  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
431  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
432 
433  /* Make sure src is aligned too. */
434  if ((((size_t) src) & 15) == 0) {
435  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
436  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
437  while (i >= 8) { /* 8 * 16-bit */
438  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
439  /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
440  const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
441  /* right-shift-sign-extend gets us sint32 with the other set of values. */
442  const __m128i b = _mm_srai_epi32(ints, 16);
443  /* Interleave these back into the right order, convert to float, multiply, store. */
444  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
445  _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
446  i -= 8; src -= 8; dst -= 8;
447  }
448  }
449 
450  src += 7; dst += 7; /* adjust for any scalar finishing. */
451 
452  /* Finish off any leftovers with scalar operations. */
453  while (i) {
454  *dst = ((float) *src) * DIVBY32768;
455  i--; src--; dst--;
456  }
457 
458  cvt->len_cvt *= 2;
459  if (cvt->filters[++cvt->filter_index]) {
460  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
461  }
462 }
463 
464 static void SDLCALL
465 SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
466 {
467  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
468  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
469  int i;
470 
471  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
472 
473  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
474  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
475  *dst = (((float) *src) * DIVBY32768) - 1.0f;
476  }
477 
478  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
479  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
480 
481  /* Make sure src is aligned too. */
482  if ((((size_t) src) & 15) == 0) {
483  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
484  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
485  const __m128 minus1 = _mm_set1_ps(1.0f);
486  while (i >= 8) { /* 8 * 16-bit */
487  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
488  /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
489  const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
490  /* right-shift-sign-extend gets us sint32 with the other set of values. */
491  const __m128i b = _mm_srli_epi32(ints, 16);
492  /* Interleave these back into the right order, convert to float, multiply, store. */
493  _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
494  _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
495  i -= 8; src -= 8; dst -= 8;
496  }
497  }
498 
499  src += 7; dst += 7; /* adjust for any scalar finishing. */
500 
501  /* Finish off any leftovers with scalar operations. */
502  while (i) {
503  *dst = (((float) *src) * DIVBY32768) - 1.0f;
504  i--; src--; dst--;
505  }
506 
507  cvt->len_cvt *= 2;
508  if (cvt->filters[++cvt->filter_index]) {
509  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
510  }
511 }
512 
513 static void SDLCALL
514 SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
515 {
516  const Sint32 *src = (const Sint32 *) cvt->buf;
517  float *dst = (float *) cvt->buf;
518  int i;
519 
520  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
521 
522  /* Get dst aligned to 16 bytes */
523  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
524  *dst = ((float) (*src>>8)) * DIVBY8388607;
525  }
526 
527  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
528 
529  /* Make sure src is aligned too. */
530  if ((((size_t) src) & 15) == 0) {
531  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
532  const __m128 divby8388607 = _mm_set1_ps(DIVBY8388607);
533  const __m128i *mmsrc = (const __m128i *) src;
534  while (i >= 4) { /* 4 * sint32 */
535  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
536  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
537  i -= 4; mmsrc++; dst += 4;
538  }
539  src = (const Sint32 *) mmsrc;
540  }
541 
542  /* Finish off any leftovers with scalar operations. */
543  while (i) {
544  *dst = ((float) (*src>>8)) * DIVBY8388607;
545  i--; src++; dst++;
546  }
547 
548  if (cvt->filters[++cvt->filter_index]) {
549  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
550  }
551 }
552 
553 static void SDLCALL
554 SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
555 {
556  const float *src = (const float *) cvt->buf;
557  Sint8 *dst = (Sint8 *) cvt->buf;
558  int i;
559 
560  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
561 
562  /* Get dst aligned to 16 bytes */
563  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
564  const float sample = *src;
565  if (sample >= 1.0f) {
566  *dst = 127;
567  } else if (sample <= -1.0f) {
568  *dst = -128;
569  } else {
570  *dst = (Sint8)(sample * 127.0f);
571  }
572  }
573 
574  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
575 
576  /* Make sure src is aligned too. */
577  if ((((size_t) src) & 15) == 0) {
578  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
579  const __m128 one = _mm_set1_ps(1.0f);
580  const __m128 negone = _mm_set1_ps(-1.0f);
581  const __m128 mulby127 = _mm_set1_ps(127.0f);
582  __m128i *mmdst = (__m128i *) dst;
583  while (i >= 16) { /* 16 * float32 */
584  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
585  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
586  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
587  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
588  _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
589  i -= 16; src += 16; mmdst++;
590  }
591  dst = (Sint8 *) mmdst;
592  }
593 
594  /* Finish off any leftovers with scalar operations. */
595  while (i) {
596  const float sample = *src;
597  if (sample >= 1.0f) {
598  *dst = 127;
599  } else if (sample <= -1.0f) {
600  *dst = -128;
601  } else {
602  *dst = (Sint8)(sample * 127.0f);
603  }
604  i--; src++; dst++;
605  }
606 
607  cvt->len_cvt /= 4;
608  if (cvt->filters[++cvt->filter_index]) {
609  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
610  }
611 }
612 
613 static void SDLCALL
614 SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
615 {
616  const float *src = (const float *) cvt->buf;
617  Uint8 *dst = (Uint8 *) cvt->buf;
618  int i;
619 
620  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
621 
622  /* Get dst aligned to 16 bytes */
623  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
624  const float sample = *src;
625  if (sample >= 1.0f) {
626  *dst = 255;
627  } else if (sample <= -1.0f) {
628  *dst = 0;
629  } else {
630  *dst = (Uint8)((sample + 1.0f) * 127.0f);
631  }
632  }
633 
634  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
635 
636  /* Make sure src is aligned too. */
637  if ((((size_t) src) & 15) == 0) {
638  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
639  const __m128 one = _mm_set1_ps(1.0f);
640  const __m128 negone = _mm_set1_ps(-1.0f);
641  const __m128 mulby127 = _mm_set1_ps(127.0f);
642  __m128i *mmdst = (__m128i *) dst;
643  while (i >= 16) { /* 16 * float32 */
644  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
645  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
646  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
647  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
648  _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
649  i -= 16; src += 16; mmdst++;
650  }
651  dst = (Uint8 *) mmdst;
652  }
653 
654  /* Finish off any leftovers with scalar operations. */
655  while (i) {
656  const float sample = *src;
657  if (sample >= 1.0f) {
658  *dst = 255;
659  } else if (sample <= -1.0f) {
660  *dst = 0;
661  } else {
662  *dst = (Uint8)((sample + 1.0f) * 127.0f);
663  }
664  i--; src++; dst++;
665  }
666 
667  cvt->len_cvt /= 4;
668  if (cvt->filters[++cvt->filter_index]) {
669  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
670  }
671 }
672 
673 static void SDLCALL
674 SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
675 {
676  const float *src = (const float *) cvt->buf;
677  Sint16 *dst = (Sint16 *) cvt->buf;
678  int i;
679 
680  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
681 
682  /* Get dst aligned to 16 bytes */
683  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
684  const float sample = *src;
685  if (sample >= 1.0f) {
686  *dst = 32767;
687  } else if (sample <= -1.0f) {
688  *dst = -32768;
689  } else {
690  *dst = (Sint16)(sample * 32767.0f);
691  }
692  }
693 
694  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
695 
696  /* Make sure src is aligned too. */
697  if ((((size_t) src) & 15) == 0) {
698  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
699  const __m128 one = _mm_set1_ps(1.0f);
700  const __m128 negone = _mm_set1_ps(-1.0f);
701  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
702  __m128i *mmdst = (__m128i *) dst;
703  while (i >= 8) { /* 8 * float32 */
704  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
705  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
706  _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2)); /* pack to sint16, store out. */
707  i -= 8; src += 8; mmdst++;
708  }
709  dst = (Sint16 *) mmdst;
710  }
711 
712  /* Finish off any leftovers with scalar operations. */
713  while (i) {
714  const float sample = *src;
715  if (sample >= 1.0f) {
716  *dst = 32767;
717  } else if (sample <= -1.0f) {
718  *dst = -32768;
719  } else {
720  *dst = (Sint16)(sample * 32767.0f);
721  }
722  i--; src++; dst++;
723  }
724 
725  cvt->len_cvt /= 2;
726  if (cvt->filters[++cvt->filter_index]) {
727  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
728  }
729 }
730 
731 static void SDLCALL
732 SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
733 {
734  const float *src = (const float *) cvt->buf;
735  Uint16 *dst = (Uint16 *) cvt->buf;
736  int i;
737 
738  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
739 
740  /* Get dst aligned to 16 bytes */
741  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
742  const float sample = *src;
743  if (sample >= 1.0f) {
744  *dst = 65535;
745  } else if (sample <= -1.0f) {
746  *dst = 0;
747  } else {
748  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
749  }
750  }
751 
752  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
753 
754  /* Make sure src is aligned too. */
755  if ((((size_t) src) & 15) == 0) {
756  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
757  /* This calculates differently than the scalar path because SSE2 can't
758  pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
759  saturation, so that would corrupt our data. _mm_packus_epi32 exists,
760  but not before SSE 4.1. So we convert from float to sint16, packing
761  that down with legit signed saturation, and then xor the top bit
762  against 1. This results in the correct unsigned 16-bit value, even
763  though it looks like dark magic. */
764  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
765  const __m128i topbit = _mm_set1_epi16(-32768);
766  const __m128 one = _mm_set1_ps(1.0f);
767  const __m128 negone = _mm_set1_ps(-1.0f);
768  __m128i *mmdst = (__m128i *) dst;
769  while (i >= 8) { /* 8 * float32 */
770  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
771  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
772  _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit)); /* pack to sint16, xor top bit, store out. */
773  i -= 8; src += 8; mmdst++;
774  }
775  dst = (Uint16 *) mmdst;
776  }
777 
778  /* Finish off any leftovers with scalar operations. */
779  while (i) {
780  const float sample = *src;
781  if (sample >= 1.0f) {
782  *dst = 65535;
783  } else if (sample <= -1.0f) {
784  *dst = 0;
785  } else {
786  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
787  }
788  i--; src++; dst++;
789  }
790 
791  cvt->len_cvt /= 2;
792  if (cvt->filters[++cvt->filter_index]) {
793  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
794  }
795 }
796 
797 static void SDLCALL
798 SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
799 {
800  const float *src = (const float *) cvt->buf;
801  Sint32 *dst = (Sint32 *) cvt->buf;
802  int i;
803 
804  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
805 
806  /* Get dst aligned to 16 bytes */
807  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
808  const float sample = *src;
809  if (sample >= 1.0f) {
810  *dst = 2147483647;
811  } else if (sample <= -1.0f) {
812  *dst = (Sint32) -2147483648LL;
813  } else {
814  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
815  }
816  }
817 
818  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
819  SDL_assert(!i || ((((size_t) src) & 15) == 0));
820 
821  {
822  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
823  const __m128 one = _mm_set1_ps(1.0f);
824  const __m128 negone = _mm_set1_ps(-1.0f);
825  const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
826  __m128i *mmdst = (__m128i *) dst;
827  while (i >= 4) { /* 4 * float32 */
828  _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8)); /* load 4 floats, clamp, convert to sint32 */
829  i -= 4; src += 4; mmdst++;
830  }
831  dst = (Sint32 *) mmdst;
832  }
833 
834  /* Finish off any leftovers with scalar operations. */
835  while (i) {
836  const float sample = *src;
837  if (sample >= 1.0f) {
838  *dst = 2147483647;
839  } else if (sample <= -1.0f) {
840  *dst = (Sint32) -2147483648LL;
841  } else {
842  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
843  }
844  i--; src++; dst++;
845  }
846 
847  if (cvt->filters[++cvt->filter_index]) {
848  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
849  }
850 }
851 #endif
852 
853 
854 #if HAVE_NEON_INTRINSICS
855 static void SDLCALL
856 SDL_Convert_S8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
857 {
858  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
859  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
860  int i;
861 
862  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using NEON)");
863 
864  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
865  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
866  *dst = ((float) *src) * DIVBY128;
867  }
868 
869  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
870  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
871 
872  /* Make sure src is aligned too. */
873  if ((((size_t) src) & 15) == 0) {
874  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
875  const int8_t *mmsrc = (const int8_t *) src;
876  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
877  while (i >= 16) { /* 16 * 8-bit */
878  const int8x16_t bytes = vld1q_s8(mmsrc); /* get 16 sint8 into a NEON register. */
879  const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes)); /* convert top 8 bytes to 8 int16 */
880  const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes)); /* convert bottom 8 bytes to 8 int16 */
881  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
882  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
883  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
884  vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
885  vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
886  i -= 16; mmsrc -= 16; dst -= 16;
887  }
888 
889  src = (const Sint8 *) mmsrc;
890  }
891 
892  src += 15; dst += 15; /* adjust for any scalar finishing. */
893 
894  /* Finish off any leftovers with scalar operations. */
895  while (i) {
896  *dst = ((float) *src) * DIVBY128;
897  i--; src--; dst--;
898  }
899 
900  cvt->len_cvt *= 4;
901  if (cvt->filters[++cvt->filter_index]) {
902  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
903  }
904 }
905 
906 static void SDLCALL
907 SDL_Convert_U8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
908 {
909  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
910  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
911  int i;
912 
913  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using NEON)");
914 
915  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
916  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
917  *dst = (((float) *src) * DIVBY128) - 1.0f;
918  }
919 
920  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
921  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
922 
923  /* Make sure src is aligned too. */
924  if ((((size_t) src) & 15) == 0) {
925  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
926  const uint8_t *mmsrc = (const uint8_t *) src;
927  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
928  const float32x4_t negone = vdupq_n_f32(-1.0f);
929  while (i >= 16) { /* 16 * 8-bit */
930  const uint8x16_t bytes = vld1q_u8(mmsrc); /* get 16 uint8 into a NEON register. */
931  const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes)); /* convert top 8 bytes to 8 uint16 */
932  const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes)); /* convert bottom 8 bytes to 8 uint16 */
933  /* split uint16 to two uint32, then convert to float, then multiply to normalize, subtract to adjust for sign, store. */
934  vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
935  vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
936  vst1q_f32(dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
937  vst1q_f32(dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
938  i -= 16; mmsrc -= 16; dst -= 16;
939  }
940 
941  src = (const Uint8 *) mmsrc;
942  }
943 
944  src += 15; dst += 15; /* adjust for any scalar finishing. */
945 
946  /* Finish off any leftovers with scalar operations. */
947  while (i) {
948  *dst = (((float) *src) * DIVBY128) - 1.0f;
949  i--; src--; dst--;
950  }
951 
952  cvt->len_cvt *= 4;
953  if (cvt->filters[++cvt->filter_index]) {
954  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
955  }
956 }
957 
958 static void SDLCALL
959 SDL_Convert_S16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
960 {
961  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
962  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
963  int i;
964 
965  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using NEON)");
966 
967  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
968  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
969  *dst = ((float) *src) * DIVBY32768;
970  }
971 
972  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
973  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
974 
975  /* Make sure src is aligned too. */
976  if ((((size_t) src) & 15) == 0) {
977  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
978  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
979  while (i >= 8) { /* 8 * 16-bit */
980  const int16x8_t ints = vld1q_s16((int16_t const *) src); /* get 8 sint16 into a NEON register. */
981  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
982  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
983  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
984  i -= 8; src -= 8; dst -= 8;
985  }
986  }
987 
988  src += 7; dst += 7; /* adjust for any scalar finishing. */
989 
990  /* Finish off any leftovers with scalar operations. */
991  while (i) {
992  *dst = ((float) *src) * DIVBY32768;
993  i--; src--; dst--;
994  }
995 
996  cvt->len_cvt *= 2;
997  if (cvt->filters[++cvt->filter_index]) {
998  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
999  }
1000 }
1001 
1002 static void SDLCALL
1003 SDL_Convert_U16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1004 {
1005  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
1006  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
1007  int i;
1008 
1009  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using NEON)");
1010 
1011  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
1012  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
1013  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1014  }
1015 
1016  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
1017  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1018 
1019  /* Make sure src is aligned too. */
1020  if ((((size_t) src) & 15) == 0) {
1021  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1022  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
1023  const float32x4_t negone = vdupq_n_f32(-1.0f);
1024  while (i >= 8) { /* 8 * 16-bit */
1025  const uint16x8_t uints = vld1q_u16((uint16_t const *) src); /* get 8 uint16 into a NEON register. */
1026  /* split uint16 to two int32, then convert to float, then multiply to normalize, subtract for sign, store. */
1027  vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1028  vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1029  i -= 8; src -= 8; dst -= 8;
1030  }
1031  }
1032 
1033  src += 7; dst += 7; /* adjust for any scalar finishing. */
1034 
1035  /* Finish off any leftovers with scalar operations. */
1036  while (i) {
1037  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1038  i--; src--; dst--;
1039  }
1040 
1041  cvt->len_cvt *= 2;
1042  if (cvt->filters[++cvt->filter_index]) {
1043  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1044  }
1045 }
1046 
1047 static void SDLCALL
1048 SDL_Convert_S32_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1049 {
1050  const Sint32 *src = (const Sint32 *) cvt->buf;
1051  float *dst = (float *) cvt->buf;
1052  int i;
1053 
1054  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using NEON)");
1055 
1056  /* Get dst aligned to 16 bytes */
1057  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1058  *dst = ((float) (*src>>8)) * DIVBY8388607;
1059  }
1060 
1061  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1062 
1063  /* Make sure src is aligned too. */
1064  if ((((size_t) src) & 15) == 0) {
1065  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1066  const float32x4_t divby8388607 = vdupq_n_f32(DIVBY8388607);
1067  const int32_t *mmsrc = (const int32_t *) src;
1068  while (i >= 4) { /* 4 * sint32 */
1069  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
1070  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1071  i -= 4; mmsrc += 4; dst += 4;
1072  }
1073  src = (const Sint32 *) mmsrc;
1074  }
1075 
1076  /* Finish off any leftovers with scalar operations. */
1077  while (i) {
1078  *dst = ((float) (*src>>8)) * DIVBY8388607;
1079  i--; src++; dst++;
1080  }
1081 
1082  if (cvt->filters[++cvt->filter_index]) {
1083  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1084  }
1085 }
1086 
1087 static void SDLCALL
1088 SDL_Convert_F32_to_S8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1089 {
1090  const float *src = (const float *) cvt->buf;
1091  Sint8 *dst = (Sint8 *) cvt->buf;
1092  int i;
1093 
1094  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using NEON)");
1095 
1096  /* Get dst aligned to 16 bytes */
1097  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1098  const float sample = *src;
1099  if (sample >= 1.0f) {
1100  *dst = 127;
1101  } else if (sample <= -1.0f) {
1102  *dst = -128;
1103  } else {
1104  *dst = (Sint8)(sample * 127.0f);
1105  }
1106  }
1107 
1108  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1109 
1110  /* Make sure src is aligned too. */
1111  if ((((size_t) src) & 15) == 0) {
1112  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1113  const float32x4_t one = vdupq_n_f32(1.0f);
1114  const float32x4_t negone = vdupq_n_f32(-1.0f);
1115  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1116  int8_t *mmdst = (int8_t *) dst;
1117  while (i >= 16) { /* 16 * float32 */
1118  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1119  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1120  const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1121  const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1122  const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, narrow to sint8 */
1123  const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4))); /* narrow to sint16, combine, narrow to sint8 */
1124  vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi)); /* combine to int8x16_t, store out */
1125  i -= 16; src += 16; mmdst += 16;
1126  }
1127  dst = (Sint8 *) mmdst;
1128  }
1129 
1130  /* Finish off any leftovers with scalar operations. */
1131  while (i) {
1132  const float sample = *src;
1133  if (sample >= 1.0f) {
1134  *dst = 127;
1135  } else if (sample <= -1.0f) {
1136  *dst = -128;
1137  } else {
1138  *dst = (Sint8)(sample * 127.0f);
1139  }
1140  i--; src++; dst++;
1141  }
1142 
1143  cvt->len_cvt /= 4;
1144  if (cvt->filters[++cvt->filter_index]) {
1145  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
1146  }
1147 }
1148 
1149 static void SDLCALL
1150 SDL_Convert_F32_to_U8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1151 {
1152  const float *src = (const float *) cvt->buf;
1153  Uint8 *dst = (Uint8 *) cvt->buf;
1154  int i;
1155 
1156  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using NEON)");
1157 
1158  /* Get dst aligned to 16 bytes */
1159  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1160  const float sample = *src;
1161  if (sample >= 1.0f) {
1162  *dst = 255;
1163  } else if (sample <= -1.0f) {
1164  *dst = 0;
1165  } else {
1166  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1167  }
1168  }
1169 
1170  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1171 
1172  /* Make sure src is aligned too. */
1173  if ((((size_t) src) & 15) == 0) {
1174  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1175  const float32x4_t one = vdupq_n_f32(1.0f);
1176  const float32x4_t negone = vdupq_n_f32(-1.0f);
1177  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1178  uint8_t *mmdst = (uint8_t *) dst;
1179  while (i >= 16) { /* 16 * float32 */
1180  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1181  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1182  const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1183  const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1184  const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, narrow to uint8 */
1185  const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4))); /* narrow to uint16, combine, narrow to uint8 */
1186  vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi)); /* combine to uint8x16_t, store out */
1187  i -= 16; src += 16; mmdst += 16;
1188  }
1189 
1190  dst = (Uint8 *) mmdst;
1191  }
1192 
1193  /* Finish off any leftovers with scalar operations. */
1194  while (i) {
1195  const float sample = *src;
1196  if (sample >= 1.0f) {
1197  *dst = 255;
1198  } else if (sample <= -1.0f) {
1199  *dst = 0;
1200  } else {
1201  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1202  }
1203  i--; src++; dst++;
1204  }
1205 
1206  cvt->len_cvt /= 4;
1207  if (cvt->filters[++cvt->filter_index]) {
1208  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
1209  }
1210 }
1211 
1212 static void SDLCALL
1213 SDL_Convert_F32_to_S16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1214 {
1215  const float *src = (const float *) cvt->buf;
1216  Sint16 *dst = (Sint16 *) cvt->buf;
1217  int i;
1218 
1219  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using NEON)");
1220 
1221  /* Get dst aligned to 16 bytes */
1222  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1223  const float sample = *src;
1224  if (sample >= 1.0f) {
1225  *dst = 32767;
1226  } else if (sample <= -1.0f) {
1227  *dst = -32768;
1228  } else {
1229  *dst = (Sint16)(sample * 32767.0f);
1230  }
1231  }
1232 
1233  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1234 
1235  /* Make sure src is aligned too. */
1236  if ((((size_t) src) & 15) == 0) {
1237  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1238  const float32x4_t one = vdupq_n_f32(1.0f);
1239  const float32x4_t negone = vdupq_n_f32(-1.0f);
1240  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1241  int16_t *mmdst = (int16_t *) dst;
1242  while (i >= 8) { /* 8 * float32 */
1243  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1244  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1245  vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, store out. */
1246  i -= 8; src += 8; mmdst += 8;
1247  }
1248  dst = (Sint16 *) mmdst;
1249  }
1250 
1251  /* Finish off any leftovers with scalar operations. */
1252  while (i) {
1253  const float sample = *src;
1254  if (sample >= 1.0f) {
1255  *dst = 32767;
1256  } else if (sample <= -1.0f) {
1257  *dst = -32768;
1258  } else {
1259  *dst = (Sint16)(sample * 32767.0f);
1260  }
1261  i--; src++; dst++;
1262  }
1263 
1264  cvt->len_cvt /= 2;
1265  if (cvt->filters[++cvt->filter_index]) {
1266  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
1267  }
1268 }
1269 
1270 static void SDLCALL
1271 SDL_Convert_F32_to_U16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1272 {
1273  const float *src = (const float *) cvt->buf;
1274  Uint16 *dst = (Uint16 *) cvt->buf;
1275  int i;
1276 
1277  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using NEON)");
1278 
1279  /* Get dst aligned to 16 bytes */
1280  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1281  const float sample = *src;
1282  if (sample >= 1.0f) {
1283  *dst = 65535;
1284  } else if (sample <= -1.0f) {
1285  *dst = 0;
1286  } else {
1287  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1288  }
1289  }
1290 
1291  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1292 
1293  /* Make sure src is aligned too. */
1294  if ((((size_t) src) & 15) == 0) {
1295  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1296  const float32x4_t one = vdupq_n_f32(1.0f);
1297  const float32x4_t negone = vdupq_n_f32(-1.0f);
1298  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1299  uint16_t *mmdst = (uint16_t *) dst;
1300  while (i >= 8) { /* 8 * float32 */
1301  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1302  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1303  vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, store out. */
1304  i -= 8; src += 8; mmdst += 8;
1305  }
1306  dst = (Uint16 *) mmdst;
1307  }
1308 
1309  /* Finish off any leftovers with scalar operations. */
1310  while (i) {
1311  const float sample = *src;
1312  if (sample >= 1.0f) {
1313  *dst = 65535;
1314  } else if (sample <= -1.0f) {
1315  *dst = 0;
1316  } else {
1317  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1318  }
1319  i--; src++; dst++;
1320  }
1321 
1322  cvt->len_cvt /= 2;
1323  if (cvt->filters[++cvt->filter_index]) {
1324  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
1325  }
1326 }
1327 
1328 static void SDLCALL
1329 SDL_Convert_F32_to_S32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1330 {
1331  const float *src = (const float *) cvt->buf;
1332  Sint32 *dst = (Sint32 *) cvt->buf;
1333  int i;
1334 
1335  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using NEON)");
1336 
1337  /* Get dst aligned to 16 bytes */
1338  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1339  const float sample = *src;
1340  if (sample >= 1.0f) {
1341  *dst = 2147483647;
1342  } else if (sample <= -1.0f) {
1343  *dst = (-2147483647) - 1;
1344  } else {
1345  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1346  }
1347  }
1348 
1349  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1350  SDL_assert(!i || ((((size_t) src) & 15) == 0));
1351 
1352  {
1353  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1354  const float32x4_t one = vdupq_n_f32(1.0f);
1355  const float32x4_t negone = vdupq_n_f32(-1.0f);
1356  const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
1357  int32_t *mmdst = (int32_t *) dst;
1358  while (i >= 4) { /* 4 * float32 */
1359  vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
1360  i -= 4; src += 4; mmdst += 4;
1361  }
1362  dst = (Sint32 *) mmdst;
1363  }
1364 
1365  /* Finish off any leftovers with scalar operations. */
1366  while (i) {
1367  const float sample = *src;
1368  if (sample >= 1.0f) {
1369  *dst = 2147483647;
1370  } else if (sample <= -1.0f) {
1371  *dst = (-2147483647) - 1;
1372  } else {
1373  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1374  }
1375  i--; src++; dst++;
1376  }
1377 
1378  if (cvt->filters[++cvt->filter_index]) {
1379  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
1380  }
1381 }
1382 #endif
1383 
1384 
1385 
1387 {
1388  static SDL_bool converters_chosen = SDL_FALSE;
1389 
1390  if (converters_chosen) {
1391  return;
1392  }
1393 
1394 #define SET_CONVERTER_FUNCS(fntype) \
1395  SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1396  SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1397  SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1398  SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1399  SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1400  SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1401  SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1402  SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1403  SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1404  SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1405  converters_chosen = SDL_TRUE
1406 
1407 #if HAVE_SSE2_INTRINSICS
1408  if (SDL_HasSSE2()) {
1409  SET_CONVERTER_FUNCS(SSE2);
1410  return;
1411  }
1412 #endif
1413 
1414 #if HAVE_NEON_INTRINSICS
1415  if (SDL_HasNEON()) {
1416  SET_CONVERTER_FUNCS(NEON);
1417  return;
1418  }
1419 #endif
1420 
1421 #if NEED_SCALAR_CONVERTER_FALLBACKS
1422  SET_CONVERTER_FUNCS(Scalar);
1423 #endif
1424 
1425 #undef SET_CONVERTER_FUNCS
1426 
1427  SDL_assert(converters_chosen == SDL_TRUE);
1428 }
1429 
1430 /* vi: set ts=4 sw=4 expandtab: */
#define LOG_DEBUG_CONVERT(from, to)
Definition: SDL_audio_c.h:34
GLenum GLenum dst
signed int int32_t
Uint8 * buf
Definition: SDL_audio.h:232
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
unsigned short uint16_t
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
int filter_index
Definition: SDL_audio.h:238
SDL_AudioFilter SDL_Convert_U8_to_F32
uint16_t Uint16
Definition: SDL_stdinc.h:191
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
GLfloat f
Uint16 SDL_AudioFormat
Audio format flags.
Definition: SDL_audio.h:64
static const double one
Definition: e_exp.c:83
signed short int16_t
GLenum src
#define AUDIO_S16SYS
Definition: SDL_audio.h:123
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
Definition: SDL_audio.h:226
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: SDL_opengl.h:1572
static const double zero
Definition: e_atan2.c:44
#define AUDIO_U8
Definition: SDL_audio.h:89
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
Definition: SDL_audio.h:237
#define AUDIO_F32SYS
Definition: SDL_audio.h:125
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int8_t Sint8
Definition: SDL_stdinc.h:173
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
uint8_t Uint8
Definition: SDL_stdinc.h:179
unsigned int size_t
#define SDL_HasNEON
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
int32_t Sint32
Definition: SDL_stdinc.h:197
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Definition: SDL_audio.h:193
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define SDL_assert(condition)
Definition: SDL_assert.h:169
#define NULL
Definition: begin_code.h:167
SDL_bool
Definition: SDL_stdinc.h:161
unsigned char uint8_t
signed char int8_t
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define SDL_HasSSE2
SDL_AudioFilter SDL_Convert_S8_to_F32
#define AUDIO_S32SYS
Definition: SDL_audio.h:124
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define DIVBY32768
#define DIVBY128
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
#define AUDIO_S8
Definition: SDL_audio.h:90
SDL_AudioFilter SDL_Convert_S32_to_F32
#define SDLCALL
Definition: SDL_internal.h:49
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)
#define AUDIO_U16SYS
Definition: SDL_audio.h:122
int16_t Sint16
Definition: SDL_stdinc.h:185
#define DIVBY8388607