Adafruit mp3
assembly.h
1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: RCSL 1.0/RPSL 1.0
3  *
4  * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file, are
7  * subject to the current version of the RealNetworks Public Source License
8  * Version 1.0 (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the RealNetworks Community Source License Version 1.0
11  * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
12  * in which case the RCSL will apply. You may also obtain the license terms
13  * directly from RealNetworks. You may not use this file except in
14  * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
15  * applicable to this file, the RCSL. Please see the applicable RPSL or
16  * RCSL for the rights, obligations and limitations governing use of the
17  * contents of the file.
18  *
19  * This file is part of the Helix DNA Technology. RealNetworks is the
20  * developer of the Original Code and owns the copyrights in the portions
21  * it created.
22  *
23  * This file, and the files included with this file, is distributed and made
24  * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
25  * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
26  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
27  * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
28  *
29  * Technology Compatibility Kit Test Suite(s) Location:
30  * http://www.helixcommunity.org/content/tck
31  *
32  * Contributor(s):
33  *
34  * ***** END LICENSE BLOCK ***** */
35 
36 /**************************************************************************************
37  * Fixed-point MP3 decoder
38  * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
39  * June 2003
40  *
41  * assembly.h - assembly language functions and prototypes for supported platforms
42  *
43  * - inline rountines with access to 64-bit multiply results
44  * - x86 (_WIN32) and ARM (ARM_ADS, _WIN32_WCE) versions included
45  * - some inline functions are mix of asm and C for speed
46  * - some functions are in native asm files, so only the prototype is given here
47  *
48  * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y), returns top 32 bits of 64-bit result
49  * FASTABS(x) branchless absolute value of signed integer x
50  * CLZ(x) count leading zeros in x
51  * MADD64(sum, x, y) (Windows only) sum [64-bit] += x [32-bit] * y [32-bit]
52  * SHL64(sum, x, y) (Windows only) 64-bit left shift using __int64
53  * SAR64(sum, x, y) (Windows only) 64-bit right shift using __int64
54  */
55 
56 #ifndef _ASSEMBLY_H
57 #define _ASSEMBLY_H
58 
59 #if (defined _WIN32 && !defined _WIN32_WCE) || (defined __WINS__ && defined _SYMBIAN) || defined(_OPENWAVE_SIMULATOR) || defined(WINCE_EMULATOR) /* Symbian emulator for Ix86 */
60 
61 #pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
62 
63 static __inline int MULSHIFT32(int x, int y)
64 {
65  __asm {
66  mov eax, x
67  imul y
68  mov eax, edx
69  }
70 }
71 
72 static __inline int FASTABS(int x)
73 {
74  int sign;
75 
76  sign = x >> (sizeof(int) * 8 - 1);
77  x ^= sign;
78  x -= sign;
79 
80  return x;
81 }
82 
83 static __inline int CLZ(int x)
84 {
85  int numZeros;
86 
87  if (!x)
88  return (sizeof(int) * 8);
89 
90  numZeros = 0;
91  while (!(x & 0x80000000)) {
92  numZeros++;
93  x <<= 1;
94  }
95 
96  return numZeros;
97 }
98 
99 /* MADD64, SHL64, SAR64:
100  * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
101  * (sometimes compiler thunks to function calls instead of code generating)
102  * required for Symbian emulator
103  */
104 #ifdef __CW32__
105 typedef long long Word64;
106 #else
107 typedef __int64 Word64;
108 #endif
109 
110 static __inline Word64 MADD64(Word64 sum, int x, int y)
111 {
112  unsigned int sumLo = ((unsigned int *)&sum)[0];
113  int sumHi = ((int *)&sum)[1];
114 
115  __asm {
116  mov eax, x
117  imul y
118  add eax, sumLo
119  adc edx, sumHi
120  }
121 
122  /* equivalent to return (sum + ((__int64)x * y)); */
123 }
124 
125 static __inline Word64 SHL64(Word64 x, int n)
126 {
127  unsigned int xLo = ((unsigned int *)&x)[0];
128  int xHi = ((int *)&x)[1];
129  unsigned char nb = (unsigned char)n;
130 
131  if (n < 32) {
132  __asm {
133  mov edx, xHi
134  mov eax, xLo
135  mov cl, nb
136  shld edx, eax, cl
137  shl eax, cl
138  }
139  } else if (n < 64) {
140  /* shl masks cl to 0x1f */
141  __asm {
142  mov edx, xLo
143  mov cl, nb
144  xor eax, eax
145  shl edx, cl
146  }
147  } else {
148  __asm {
149  xor edx, edx
150  xor eax, eax
151  }
152  }
153 }
154 
155 static __inline Word64 SAR64(Word64 x, int n)
156 {
157  unsigned int xLo = ((unsigned int *)&x)[0];
158  int xHi = ((int *)&x)[1];
159  unsigned char nb = (unsigned char)n;
160 
161  if (n < 32) {
162  __asm {
163  mov edx, xHi
164  mov eax, xLo
165  mov cl, nb
166  shrd eax, edx, cl
167  sar edx, cl
168  }
169  } else if (n < 64) {
170  /* sar masks cl to 0x1f */
171  __asm {
172  mov edx, xHi
173  mov eax, xHi
174  mov cl, nb
175  sar edx, 31
176  sar eax, cl
177  }
178  } else {
179  __asm {
180  sar xHi, 31
181  mov eax, xHi
182  mov edx, xHi
183  }
184  }
185 }
186 
187 #elif (defined _WIN32) && (defined _WIN32_WCE)
188 
189 /* use asm function for now (EVC++ 3.0 does horrible job compiling __int64 version) */
190 #define MULSHIFT32 xmp3_MULSHIFT32
191 int MULSHIFT32(int x, int y);
192 
193 static __inline int FASTABS(int x)
194 {
195  int sign;
196 
197  sign = x >> (sizeof(int) * 8 - 1);
198  x ^= sign;
199  x -= sign;
200 
201  return x;
202 }
203 
204 static __inline int CLZ(int x)
205 {
206  int numZeros;
207 
208  if (!x)
209  return (sizeof(int) * 8);
210 
211  numZeros = 0;
212  while (!(x & 0x80000000)) {
213  numZeros++;
214  x <<= 1;
215  }
216 
217  return numZeros;
218 }
219 
220 #elif defined ARM_ADS
221 
222 static __inline int MULSHIFT32(int x, int y)
223 {
224  /* important rules for smull RdLo, RdHi, Rm, Rs:
225  * RdHi and Rm can't be the same register
226  * RdLo and Rm can't be the same register
227  * RdHi and RdLo can't be the same register
228  * Note: Rs determines early termination (leading sign bits) so if you want to specify
229  * which operand is Rs, put it in the SECOND argument (y)
230  * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
231  * which one is returned. (If this were a function call, returning y (R1) would
232  * require an extra "mov r0, r1")
233  */
234  int zlow;
235  __asm {
236  smull zlow,y,x,y
237  }
238 
239  return y;
240 }
241 
242 static __inline int FASTABS(int x)
243 {
244  int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
245 
246  __asm {
247  eor t, x, x, asr #31
248  sub t, t, x, asr #31
249  }
250 
251  return t;
252 }
253 
254 static __inline int CLZ(int x)
255 {
256  int numZeros;
257 
258  if (!x)
259  return (sizeof(int) * 8);
260 
261  numZeros = 0;
262  while (!(x & 0x80000000)) {
263  numZeros++;
264  x <<= 1;
265  }
266 
267  return numZeros;
268 }
269 
270 #elif defined(__GNUC__) && defined(ARM)
271 
272 static __inline int MULSHIFT32(int x, int y)
273 {
274  /* important rules for smull RdLo, RdHi, Rm, Rs:
275  * RdHi and Rm can't be the same register
276  * RdLo and Rm can't be the same register
277  * RdHi and RdLo can't be the same register
278  * Note: Rs determines early termination (leading sign bits) so if you want to specify
279  * which operand is Rs, put it in the SECOND argument (y)
280  * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
281  * which one is returned. (If this were a function call, returning y (R1) would
282  * require an extra "mov r0, r1")
283  */
284  int zlow;
285  __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
286 
287  return y;
288 }
289 
290 static __inline int FASTABS(int x)
291 {
292  int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
293 
294  __asm__ volatile (
295  "eor %0,%2,%2, asr #31;"
296  "sub %0,%1,%2, asr #31;"
297  : "=&r" (t)
298  : "0" (t), "r" (x)
299  );
300 
301  return t;
302 }
303 
304 static __inline int CLZ(int x)
305 {
306  int numZeros;
307 
308  if (!x)
309  return (sizeof(int) * 8);
310 
311  numZeros = 0;
312  while (!(x & 0x80000000)) {
313  numZeros++;
314  x <<= 1;
315  }
316 
317  return numZeros;
318 }
319 
320 #elif defined(__GNUC__) && defined(__AVR32_UC__)
321 
322 typedef signed long long int Word64; // 64-bit signed integer.
323 
324 
325 __attribute__((__always_inline__)) static __inline int MULSHIFT32(int x, int y)
326 {
327  signed long long int s64Tmp;
328  __asm__ __volatile__( "muls.d %0, %1, %2"
329  : "=r" (s64Tmp)
330  : "r" (x), "r" (y) );
331  return( s64Tmp >> 32 );
332 }
333 
334 __attribute__((__always_inline__)) static __inline int FASTABS(int x)
335 {
336  int tmp;
337  __asm__ __volatile__( "abs %0"
338  : "=r" (tmp)
339  : "r" (x) );
340  return tmp;
341 
342 }
343 
344 
345 __attribute__((__always_inline__)) static __inline int CLZ(int x)
346 {
347  int tmp;
348  __asm__ __volatile__( "clz %0,%1"
349  : "=r" (tmp)
350  : "r" (x) );
351  return tmp;
352 }
353 
354 
355 /* MADD64, SAR64:
356  * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
357  * (sometimes compiler do function calls instead of code generating)
358  */
359 __attribute__((__always_inline__)) static __inline Word64 MADD64(Word64 sum, int x, int y)
360 {
361  __asm__ __volatile__( "macs.d %0, %1, %2"
362  : "+r" (sum)
363  : "r" (x), "r" (y) );
364  return( sum );
365 }
366 
367 
368 __attribute__((__always_inline__)) static __inline Word64 SAR64(Word64 x, int n)
369 {
370  unsigned int xLo = (unsigned int) x;
371  int xHi = (int) (x >> 32);
372  int nComp = 32-n;
373  int tmp;
374  // Shortcut: n is always < 32.
375  __asm__ __volatile__( "lsl %2, %0, %3\n\t" // tmp <- xHi<<(32-n)
376  "asr %0, %0, %4\n\t" // xHi <- xHi>>n
377  "lsr %1, %1, %4\n\t" // xLo <- xLo>>n
378  "or %1, %2\n\t" // xLo <= xLo || tmp
379  : "+&r" (xHi), "+r" (xLo), "=&r" (tmp)
380  : "r" (nComp), "r" (n) );
381  x = xLo | ((Word64)xHi << 32);
382  return( x );
383 }
384 
385 #elif defined(__CORTEX_M) && __CORTEX_M == 0x04U
386 
387 /* ARM cortex m4 */
388 
389 typedef signed long long int Word64; // 64-bit signed integer.
390 
391 
392 static __inline int MULSHIFT32(int x, int y)
393 {
394  /* important rules for smull RdLo, RdHi, Rm, Rs:
395  * RdHi and Rm can't be the same register
396  * RdLo and Rm can't be the same register
397  * RdHi and RdLo can't be the same register
398  * Note: Rs determines early termination (leading sign bits) so if you want to specify
399  * which operand is Rs, put it in the SECOND argument (y)
400  * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
401  * which one is returned. (If this were a function call, returning y (R1) would
402  * require an extra "mov r0, r1")
403  */
404  int zlow;
405  __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
406 
407  return y;
408 }
409 
410 static __inline int FASTABS(int x)
411 {
412  int sign;
413 
414  sign = x >> (sizeof(int) * 8 - 1);
415  x ^= sign;
416  x -= sign;
417 
418  return x;
419 }
420 
421 static __inline int CLZ(int x)
422 {
423  return __CLZ(x);
424 }
425 
426 typedef union _U64 {
427  Word64 w64;
428  struct {
429  /* ARM ADS = little endian */
430  unsigned int lo32;
431  signed int hi32;
432  } r;
433 } U64;
434 
435 static __inline Word64 MADD64(Word64 sum64, int x, int y)
436 {
437  U64 u;
438  u.w64 = sum64;
439 
440  __asm__ volatile ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");
441 
442  return u.w64;
443 }
444 
445 
446 __attribute__((__always_inline__)) static __inline Word64 SAR64(Word64 x, int n)
447 {
448  unsigned int xLo = (unsigned int) x;
449  int xHi = (int) (x >> 32);
450  int nComp = 32-n;
451  int tmp;
452  // Shortcut: n is always < 32.
453  __asm__ __volatile__( "lsl %2, %0, %3\n\t" // tmp <- xHi<<(32-n)
454  "asr %0, %0, %4\n\t" // xHi <- xHi>>n
455  "lsr %1, %1, %4\n\t" // xLo <- xLo>>n
456  "orr %1, %2\n\t" // xLo <= xLo || tmp
457  : "+&r" (xHi), "+r" (xLo), "=&r" (tmp)
458  : "r" (nComp), "r" (n) );
459  x = xLo | ((Word64)xHi << 32);
460  return( x );
461 }
462 
463 //END cortex m4
464 
465 
466 #else
467 
468 #error Unsupported platform in assembly.h
469 
470 #endif /* platforms */
471 
472 #endif /* _ASSEMBLY_H */