TGX 1.0.4
A tiny 2D/3D graphics library optimized for 32 bits microcontrollers.
Loading...
Searching...
No Matches
Misc.h
Go to the documentation of this file.
1
5//
6// Copyright 2020 Arvind Singh
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11//version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; If not, see <http://www.gnu.org/licenses/>.
20
21#ifndef _TGX_MISC_H_
22#define _TGX_MISC_H_
23
24
25#include <stdint.h>
26#include <math.h>
27#include <string.h>
28
29
30// disable mtools extensions by default
31#ifndef MTOOLS_TGX_EXTENSIONS
32#define MTOOLS_TGX_EXTENSIONS 0
33#endif
34
35
36
37#if defined(TEENSYDUINO) || defined(ESP32) || defined(ARDUINO_ARCH_STM32)
38 #include "Arduino.h" // include Arduino to get PROGMEM macro and others
39 #define TGX_ON_ARDUINO
40
41 #define TGX_USE_FAST_INV_SQRT_TRICK
42 #define TGX_USE_FAST_SQRT_TRICK
43 //#define TGX_USE_FAST_INV_TRICK // bug and slower then regular inv anyway...
44
45 #define TGX_INLINE __attribute__((always_inline))
46 #define TGX_NOINLINE __attribute__((noinline, noclone)) FLASHMEM
47#else
48 #define TGX_INLINE
49 #define TGX_NOINLINE
50#endif
51
52#ifndef PROGMEM
53 #define PROGMEM
54#endif
55
56#ifndef FLASHMEM
57 #define FLASHMEM
58#endif
59
60
61/* */
62#ifndef TGX_SINGLE_PRECISION_COMPUTATIONS
63 #define TGX_SINGLE_PRECISION_COMPUTATIONS 1
64#endif
65
66
67#define TGX_DEFAULT_NO_BLENDING -1.0f
68
69
70#if defined(TEENSYDUINO) || defined(ESP32)
71/* Size of the cache when reading in PROGMEM. This value is used to try to optimize cache read to
72improve rendering speed when reading large image in flash... On teensy, 8K give good results.*/
73#define TGX_PROGMEM_DEFAULT_CACHE_SIZE 8192
74#else
75/* Size of the cache when reading in PROGMEM. This value is used to try to optimize cache read to
76improve rendering speed when reading large image in flash... Can use large value on CPU.*/
77#define TGX_PROGMEM_DEFAULT_CACHE_SIZE 262144
78#endif
79
80
82#define TGX_CAST32(a) ((int32_t)(a))
83
84
85// c++, no plain c
86#ifdef __cplusplus
87
88#define DEPRECATED(X) [[deprecated(" " X " ")]]
89
90
91// check that int is at least 4 bytes.
92static_assert(sizeof(int) >= 4, "The TGX library only works on 32 bits or 64 bits architecture. Sorry!");
93
94
95
96#if defined(ARDUINO_TEENSY41)
97
98 // check existence of external ram (EXTMEM).
99 extern "C" uint8_t external_psram_size;
100
101 // check is an address is in flash
102 #define TGX_IS_PROGMEM(X) ((((uint32_t)(X)) >= 0x60000000)&&(((uint32_t)(X)) < 0x70000000))
103
104 // check if an address is in external ram
105 #define TGX_IS_EXTMEM(X) ((((uint32_t)(X)) >= 0x70000000)&&(((uint32_t)(X)) < 0x80000000))
106
107#endif
108
109#ifndef M_PI
110#define M_PI 3.14159265358979323846
111#endif
112
113
114namespace tgx
115{
116
117
123 template<int N> struct DummyType
124 {
125 // nothing here :-)
126 };
127
128
134 template<bool BB1, bool BB2> struct DummyTypeBB
135 {
136 // nothing here :-)
137 };
138
139
141 template<typename T = int> struct DefaultFPType
142 {
143#if TGX_SINGLE_PRECISION_COMPUTATIONS
144 typedef float fptype;
145#else
146 typedef double fptype;
147#endif
148 };
149
150#if TGX_SINGLE_PRECISION_COMPUTATIONS
152 template<> struct DefaultFPType<double>
153 {
154 typedef double fptype;
155 };
156#endif
157
158
160 template <typename, typename> struct is_same { static const bool value = false; };
161 template <typename T> struct is_same<T, T> { static const bool value = true; };
162
163
165 TGX_INLINE inline uint16_t BigEndian16(uint16_t v)
166 {
167#ifdef __GNUC__
168 return __builtin_bswap16(v);
169#else
170 return ((v >> 8) | (v << 8));
171#endif
172 }
173
174
176 template<typename T> TGX_INLINE inline void swap(T& a, T& b) { T c(a); a = b; b = c; }
177
178
180 template<typename T> TGX_INLINE inline T min(const T & a, const T & b) { return((a < b) ? a : b); }
181
182
184 template<typename T> TGX_INLINE inline T max(const T & a, const T & b) { return((a > b) ? a : b); }
185
186
188 template<typename T> TGX_INLINE inline T clamp(const T & v, const T & vmin, const T & vmax)
189 {
190 return max(vmin, min(vmax, v));
191 }
192
193
195 TGX_INLINE inline float roundfp(const float f) { return roundf(f); }
196
197
199 TGX_INLINE inline double roundfp(const double f) { return round(f); }
200
201
202
206 TGX_INLINE inline int32_t safeMultB(int32_t A, int32_t B)
207 {
208 if ((A == 0) || (B == 0)) return B;
209 const int32_t max32 = 2147483647;
210 const int32_t nB = max32 / ((A > 0) ? A : (-A));
211 return ((B <= nB) ? B : nB);
212 }
213
214
215
219 TGX_INLINE inline float fast_inv(float x)
220 {
221#if defined(__XTENSA__) && !defined(__XTENSA_SOFT_FLOAT__)
222 // 2 NR iterations, error < 1 ULP
223 float t, result;
224 asm volatile (
225 "recip0.s %0, %2\n\t"
226 "const.s %1, 1\n\t"
227 "msub.s %1, %2, %0\n\t"
228 "madd.s %0, %0, %1\n\t"
229 "const.s %1, 1\n\t"
230 "msub.s %1, %2, %0\n\t"
231 "maddn.s %0, %0, %1"
232 : "=&f" (result),
233 "=&f" (t)
234 : "f" (x)
235 );
236 return result;
237#elif defined (TGX_USE_FAST_INV_TRICK)
238 union
239 {
240 float f;
241 uint32_t u;
242 } v;
243 v.f = x;
244 v.u = 0x5f375a86 - (v.u >> 1); // slightly more precise than the original 0x5f3759df
245 const float x2 = x * 0.5f;
246 const float threehalfs = 1.5f;
247 v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 1st iteration
248// v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 2nd iteration (not needed)
249 return v.f * v.f;
250#else
251 return ((x == 0) ? 1.0f : (1.0f / x));
252#endif
253 }
254
255
259 TGX_INLINE inline double fast_inv(double x)
260 {
261 // do not use fast approximation for double type.
262 return ((x == 0) ? 1.0 : (1.0 / x));
263 }
264
265
266
270 TGX_INLINE inline float precise_sqrt(float x)
271 {
272 return sqrtf(x);
273 }
274
275
279 TGX_INLINE inline double precise_sqrt(double x)
280 {
281 return sqrt(x);
282 }
283
284
288 TGX_INLINE inline float fast_sqrt(float x)
289 {
290#if defined (TGX_USE_FAST_SQRT_TRICK)
291 union
292 {
293 float f;
294 uint32_t u;
295 } v;
296 v.f = x;
297 v.u = 0x5f375a86 - (v.u >> 1); // slightly more precise than the original 0x5f3759df
298 const float x2 = x * 0.5f;
299 const float threehalfs = 1.5f;
300 v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 1st iteration
301// v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 2nd iteration (not needed)
302 return x * v.f;
303#else
304 return precise_sqrt(x);
305#endif
306 }
307
308
312 TGX_INLINE inline double fast_sqrt(double x)
313 {
314 // do not use fast approximation for double type.
315 return precise_sqrt(x);
316 }
317
318
322 TGX_INLINE inline float precise_invsqrt(float x)
323 {
324#if defined(__XTENSA__) && !defined(__XTENSA_SOFT_FLOAT__)
325 // 2 NR iterations, error < 2 ULP
326 float t0, t1, t2, t3, result;
327 asm volatile (
328 "rsqrt0.s %0, %5\n\t"
329 "mul.s %1, %5, %0\n\t"
330 "const.s %2, 3\n\t"
331 "mul.s %3, %2, %0\n\t"
332 "const.s %4, 1\n\t"
333 "msub.s %4, %1, %0\n\t"
334 "madd.s %0, %3, %4\n\t"
335 "mul.s %1, %5, %0\n\t"
336 "mul.s %3, %2, %0\n\t"
337 "const.s %4, 1\n\t"
338 "msub.s %4, %1, %0\n\t"
339 "maddn.s %0, %3, %4"
340 : "=&f" (result),
341 "=&f" (t0),
342 "=&f" (t1),
343 "=&f" (t2),
344 "=&f" (t3)
345 : "f" (x)
346 );
347 return result;
348#else
349 const float s = sqrtf(x);
350 return (s == 0) ? 1.0f : (1.0f / s);
351#endif
352 }
353
354
358 TGX_INLINE inline double precise_invsqrt(double x)
359 {
360 const double s = sqrt(x);
361 return (s == 0) ? 1.0 : (1.0 / sqrt(s));
362 }
363
364
368 TGX_INLINE inline float fast_invsqrt(float x)
369 {
370#if defined(__XTENSA__) && !defined(__XTENSA_SOFT_FLOAT__)
371 // 1 NR iteration, error < 728 ULP
372 float t0, t1, t2, t3, result;
373 asm volatile (
374 "rsqrt0.s %0, %5\n\t"
375 "mul.s %1, %5, %0\n\t"
376 "const.s %2, 3\n\t"
377 "mul.s %3, %2, %0\n\t"
378 "const.s %4, 1\n\t"
379 "msub.s %4, %1, %0\n\t"
380 "maddn.s %0, %3, %4"
381 : "=&f" (result),
382 "=&f" (t0),
383 "=&f" (t1),
384 "=&f" (t2),
385 "=&f" (t3)
386 : "f" (x)
387 );
388 return result;
389#elif defined (TGX_USE_FAST_INV_SQRT_TRICK)
390 // fast reciprocal square root : https://en.wikipedia.org/wiki/Fast_inverse_square_root
391 //github.com/JarkkoPFC/meshlete/blob/master/src/core/math/fast_math.inl
392 union
393 {
394 float f;
395 uint32_t u;
396 } v;
397 v.f = x;
398 v.u = 0x5f375a86 - (v.u >> 1); // slightly more precise than the original 0x5f3759df
399 const float x2 = x * 0.5f;
400 const float threehalfs = 1.5f;
401 v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 1st iteration
402// v.f = v.f * (threehalfs - (x2 * v.f * v.f)); // 2nd iteration (not needed)
403 return v.f;
404#else
405 return precise_invsqrt(x);
406#endif
407 }
408
409
413 TGX_INLINE inline double fast_invsqrt(double x)
414 {
415 // do not use fast approximation for double type.
416 return precise_invsqrt(x);
417 }
418
419
423 TGX_INLINE inline int32_t lfloorf(float x)
424 {
425#if defined(__XTENSA__) && !defined(__XTENSA_SOFT_FLOAT__)
426 uint32_t result;
427 asm volatile (
428 "floor.s %0, %1, 0"
429 : "=a" (result)
430 : "f" (x)
431 );
432 return result;
433#else
434 return (int32_t)floorf(x);
435#endif
436 }
437}
438
439#endif
440
441#endif
442
443
445
TGX_INLINE int32_t lfloorf(float x)
Compute (int32_t)floorf(x).
Definition Misc.h:423
TGX_INLINE T min(const T &a, const T &b)
Don't know why but faster than fminf() for floats.
Definition Misc.h:180
TGX_INLINE T max(const T &a, const T &b)
Don't know why but much faster than fmaxf() for floats.
Definition Misc.h:184
TGX_INLINE T clamp(const T &v, const T &vmin, const T &vmax)
Template clamp version.
Definition Misc.h:188
TGX_INLINE void swap(T &a, T &b)
Baby let me swap you one more time...
Definition Misc.h:176
TGX_INLINE int32_t safeMultB(int32_t A, int32_t B)
Return a value smaller or equal to B such that the multiplication by A is safe (no overflow with int3...
Definition Misc.h:206
TGX_INLINE float fast_invsqrt(float x)
Compute a fast approximation of the inverse square root of a float.
Definition Misc.h:368
TGX_INLINE float precise_sqrt(float x)
Compute the square root of a float (exact computation).
Definition Misc.h:270
TGX_INLINE float roundfp(const float f)
Rounding for floats.
Definition Misc.h:195
TGX_INLINE float precise_invsqrt(float x)
Compute the inverse square root of a float (exact computation).
Definition Misc.h:322
TGX_INLINE float fast_sqrt(float x)
Compute a fast approximation of the square root of a float.
Definition Misc.h:288
TGX_INLINE float fast_inv(float x)
Fast (approximate) computation of 1/x.
Definition Misc.h:219
TGX_INLINE uint16_t BigEndian16(uint16_t v)
little endian / big endian conversion
Definition Misc.h:165