aboutsummaryrefslogtreecommitdiff
path: root/lib/lib8tion/math8.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lib8tion/math8.h')
-rw-r--r--lib/lib8tion/math8.h552
1 files changed, 552 insertions, 0 deletions
diff --git a/lib/lib8tion/math8.h b/lib/lib8tion/math8.h
new file mode 100644
index 000000000..8c6b6c227
--- /dev/null
+++ b/lib/lib8tion/math8.h
@@ -0,0 +1,552 @@
1#ifndef __INC_LIB8TION_MATH_H
2#define __INC_LIB8TION_MATH_H
3
4#include "scale8.h"
5
6///@ingroup lib8tion
7
8///@defgroup Math Basic math operations
9/// Fast, efficient 8-bit math functions specifically
10/// designed for high-performance LED programming.
11///
12/// Because of the AVR(Arduino) and ARM assembly language
13/// implementations provided, using these functions often
14/// results in smaller and faster code than the equivalent
15/// program using plain "C" arithmetic and logic.
16///@{
17
18
19/// add one byte to another, saturating at 0xFF
20/// @param i - first byte to add
21/// @param j - second byte to add
22/// @returns the sum of i & j, capped at 0xFF
23LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
24{
25#if QADD8_C == 1
26 uint16_t t = i + j;
27 if (t > 255) t = 255;
28 return t;
29#elif QADD8_AVRASM == 1
30 asm volatile(
31 /* First, add j to i, conditioning the C flag */
32 "add %0, %1 \n\t"
33
34 /* Now test the C flag.
35 If C is clear, we branch around a load of 0xFF into i.
36 If C is set, we go ahead and load 0xFF into i.
37 */
38 "brcc L_%= \n\t"
39 "ldi %0, 0xFF \n\t"
40 "L_%=: "
41 : "+a" (i)
42 : "a" (j) );
43 return i;
44#elif QADD8_ARM_DSP_ASM == 1
45 asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
46 return i;
47#else
48#error "No implementation for qadd8 available."
49#endif
50}
51
52/// Add one byte to another, saturating at 0x7F
53/// @param i - first byte to add
54/// @param j - second byte to add
55/// @returns the sum of i & j, capped at 0xFF
56LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
57{
58#if QADD7_C == 1
59 int16_t t = i + j;
60 if (t > 127) t = 127;
61 return t;
62#elif QADD7_AVRASM == 1
63 asm volatile(
64 /* First, add j to i, conditioning the V flag */
65 "add %0, %1 \n\t"
66
67 /* Now test the V flag.
68 If V is clear, we branch around a load of 0x7F into i.
69 If V is set, we go ahead and load 0x7F into i.
70 */
71 "brvc L_%= \n\t"
72 "ldi %0, 0x7F \n\t"
73 "L_%=: "
74 : "+a" (i)
75 : "a" (j) );
76
77 return i;
78#elif QADD7_ARM_DSP_ASM == 1
79 asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
80 return i;
81#else
82#error "No implementation for qadd7 available."
83#endif
84}
85
86/// subtract one byte from another, saturating at 0x00
87/// @returns i - j with a floor of 0
88LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
89{
90#if QSUB8_C == 1
91 int16_t t = i - j;
92 if (t < 0) t = 0;
93 return t;
94#elif QSUB8_AVRASM == 1
95
96 asm volatile(
97 /* First, subtract j from i, conditioning the C flag */
98 "sub %0, %1 \n\t"
99
100 /* Now test the C flag.
101 If C is clear, we branch around a load of 0x00 into i.
102 If C is set, we go ahead and load 0x00 into i.
103 */
104 "brcc L_%= \n\t"
105 "ldi %0, 0x00 \n\t"
106 "L_%=: "
107 : "+a" (i)
108 : "a" (j) );
109
110 return i;
111#else
112#error "No implementation for qsub8 available."
113#endif
114}
115
116/// add one byte to another, with one byte result
117LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
118{
119#if ADD8_C == 1
120 uint16_t t = i + j;
121 return t;
122#elif ADD8_AVRASM == 1
123 // Add j to i, period.
124 asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
125 return i;
126#else
127#error "No implementation for add8 available."
128#endif
129}
130
131/// add one byte to another, with one byte result
132LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
133{
134#if ADD8_C == 1
135 uint16_t t = i + j;
136 return t;
137#elif ADD8_AVRASM == 1
138 // Add i(one byte) to j(two bytes)
139 asm volatile( "add %A[j], %[i] \n\t"
140 "adc %B[j], __zero_reg__ \n\t"
141 : [j] "+a" (j)
142 : [i] "a" (i)
143 );
144 return i;
145#else
146#error "No implementation for add8to16 available."
147#endif
148}
149
150
151/// subtract one byte from another, 8-bit result
152LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
153{
154#if SUB8_C == 1
155 int16_t t = i - j;
156 return t;
157#elif SUB8_AVRASM == 1
158 // Subtract j from i, period.
159 asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
160 return i;
161#else
162#error "No implementation for sub8 available."
163#endif
164}
165
166/// Calculate an integer average of two unsigned
167/// 8-bit integer values (uint8_t).
168/// Fractional results are rounded down, e.g. avg8(20,41) = 30
169LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
170{
171#if AVG8_C == 1
172 return (i + j) >> 1;
173#elif AVG8_AVRASM == 1
174 asm volatile(
175 /* First, add j to i, 9th bit overflows into C flag */
176 "add %0, %1 \n\t"
177 /* Divide by two, moving C flag into high 8th bit */
178 "ror %0 \n\t"
179 : "+a" (i)
180 : "a" (j) );
181 return i;
182#else
183#error "No implementation for avg8 available."
184#endif
185}
186
187/// Calculate an integer average of two unsigned
188/// 16-bit integer values (uint16_t).
189/// Fractional results are rounded down, e.g. avg16(20,41) = 30
190LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
191{
192#if AVG16_C == 1
193 return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
194#elif AVG16_AVRASM == 1
195 asm volatile(
196 /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
197 "add %A[i], %A[j] \n\t"
198 /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
199 "adc %B[i], %B[j] \n\t"
200 /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
201 "ror %B[i] \n\t"
202 /* Divide iLo by two, moving C flag into high 8th bit */
203 "ror %A[i] \n\t"
204 : [i] "+a" (i)
205 : [j] "a" (j) );
206 return i;
207#else
208#error "No implementation for avg16 available."
209#endif
210}
211
212
213/// Calculate an integer average of two signed 7-bit
214/// integers (int8_t)
215/// If the first argument is even, result is rounded down.
216/// If the first argument is odd, result is result up.
217LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
218{
219#if AVG7_C == 1
220 return ((i + j) >> 1) + (i & 0x1);
221#elif AVG7_AVRASM == 1
222 asm volatile(
223 "asr %1 \n\t"
224 "asr %0 \n\t"
225 "adc %0, %1 \n\t"
226 : "+a" (i)
227 : "a" (j) );
228 return i;
229#else
230#error "No implementation for avg7 available."
231#endif
232}
233
234/// Calculate an integer average of two signed 15-bit
235/// integers (int16_t)
236/// If the first argument is even, result is rounded down.
237/// If the first argument is odd, result is result up.
238LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
239{
240#if AVG15_C == 1
241 return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
242#elif AVG15_AVRASM == 1
243 asm volatile(
244 /* first divide j by 2, throwing away lowest bit */
245 "asr %B[j] \n\t"
246 "ror %A[j] \n\t"
247 /* now divide i by 2, with lowest bit going into C */
248 "asr %B[i] \n\t"
249 "ror %A[i] \n\t"
250 /* add j + C to i */
251 "adc %A[i], %A[j] \n\t"
252 "adc %B[i], %B[j] \n\t"
253 : [i] "+a" (i)
254 : [j] "a" (j) );
255 return i;
256#else
257#error "No implementation for avg15 available."
258#endif
259}
260
261
262/// Calculate the remainder of one unsigned 8-bit
263/// value divided by anoter, aka A % M.
264/// Implemented by repeated subtraction, which is
265/// very compact, and very fast if A is 'probably'
266/// less than M. If A is a large multiple of M,
267/// the loop has to execute multiple times. However,
268/// even in that case, the loop is only two
269/// instructions long on AVR, i.e., quick.
270LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
271{
272#if defined(__AVR__)
273 asm volatile (
274 "L_%=: sub %[a],%[m] \n\t"
275 " brcc L_%= \n\t"
276 " add %[a],%[m] \n\t"
277 : [a] "+r" (a)
278 : [m] "r" (m)
279 );
280#else
281 while( a >= m) a -= m;
282#endif
283 return a;
284}
285
286/// Add two numbers, and calculate the modulo
287/// of the sum and a third number, M.
288/// In other words, it returns (A+B) % M.
289/// It is designed as a compact mechanism for
290/// incrementing a 'mode' switch and wrapping
291/// around back to 'mode 0' when the switch
292/// goes past the end of the available range.
293/// e.g. if you have seven modes, this switches
294/// to the next one and wraps around if needed:
295/// mode = addmod8( mode, 1, 7);
296///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
297LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
298{
299#if defined(__AVR__)
300 asm volatile (
301 " add %[a],%[b] \n\t"
302 "L_%=: sub %[a],%[m] \n\t"
303 " brcc L_%= \n\t"
304 " add %[a],%[m] \n\t"
305 : [a] "+r" (a)
306 : [b] "r" (b), [m] "r" (m)
307 );
308#else
309 a += b;
310 while( a >= m) a -= m;
311#endif
312 return a;
313}
314
315/// Subtract two numbers, and calculate the modulo
316/// of the difference and a third number, M.
317/// In other words, it returns (A-B) % M.
318/// It is designed as a compact mechanism for
319/// incrementing a 'mode' switch and wrapping
320/// around back to 'mode 0' when the switch
321/// goes past the end of the available range.
322/// e.g. if you have seven modes, this switches
323/// to the next one and wraps around if needed:
324/// mode = addmod8( mode, 1, 7);
325///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
326LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
327{
328#if defined(__AVR__)
329 asm volatile (
330 " sub %[a],%[b] \n\t"
331 "L_%=: sub %[a],%[m] \n\t"
332 " brcc L_%= \n\t"
333 " add %[a],%[m] \n\t"
334 : [a] "+r" (a)
335 : [b] "r" (b), [m] "r" (m)
336 );
337#else
338 a -= b;
339 while( a >= m) a -= m;
340#endif
341 return a;
342}
343
344/// 8x8 bit multiplication, with 8 bit result
345LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
346{
347#if MUL8_C == 1
348 return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
349#elif MUL8_AVRASM == 1
350 asm volatile(
351 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
352 "mul %0, %1 \n\t"
353 /* Extract the LOW 8-bits (r0) */
354 "mov %0, r0 \n\t"
355 /* Restore r1 to "0"; it's expected to always be that */
356 "clr __zero_reg__ \n\t"
357 : "+a" (i)
358 : "a" (j)
359 : "r0", "r1");
360
361 return i;
362#else
363#error "No implementation for mul8 available."
364#endif
365}
366
367
368/// saturating 8x8 bit multiplication, with 8 bit result
369/// @returns the product of i * j, capping at 0xFF
370LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
371{
372#if QMUL8_C == 1
373 int p = ((uint16_t)i * (uint16_t)(j) );
374 if( p > 255) p = 255;
375 return p;
376#elif QMUL8_AVRASM == 1
377 asm volatile(
378 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
379 " mul %0, %1 \n\t"
380 /* If high byte of result is zero, all is well. */
381 " tst r1 \n\t"
382 " breq Lnospill_%= \n\t"
383 /* If high byte of result > 0, saturate low byte to 0xFF */
384 " ldi %0,0xFF \n\t"
385 " rjmp Ldone_%= \n\t"
386 "Lnospill_%=: \n\t"
387 /* Extract the LOW 8-bits (r0) */
388 " mov %0, r0 \n\t"
389 "Ldone_%=: \n\t"
390 /* Restore r1 to "0"; it's expected to always be that */
391 " clr __zero_reg__ \n\t"
392 : "+a" (i)
393 : "a" (j)
394 : "r0", "r1");
395
396 return i;
397#else
398#error "No implementation for qmul8 available."
399#endif
400}
401
402
403/// take abs() of a signed 8-bit uint8_t
404LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
405{
406#if ABS8_C == 1
407 if( i < 0) i = -i;
408 return i;
409#elif ABS8_AVRASM == 1
410
411
412 asm volatile(
413 /* First, check the high bit, and prepare to skip if it's clear */
414 "sbrc %0, 7 \n"
415
416 /* Negate the value */
417 "neg %0 \n"
418
419 : "+r" (i) : "r" (i) );
420 return i;
421#else
422#error "No implementation for abs8 available."
423#endif
424}
425
426/// square root for 16-bit integers
427/// About three times faster and five times smaller
428/// than Arduino's general sqrt on AVR.
429LIB8STATIC uint8_t sqrt16(uint16_t x)
430{
431 if( x <= 1) {
432 return x;
433 }
434
435 uint8_t low = 1; // lower bound
436 uint8_t hi, mid;
437
438 if( x > 7904) {
439 hi = 255;
440 } else {
441 hi = (x >> 5) + 8; // initial estimate for upper bound
442 }
443
444 do {
445 mid = (low + hi) >> 1;
446 if ((uint16_t)(mid * mid) > x) {
447 hi = mid - 1;
448 } else {
449 if( mid == 255) {
450 return 255;
451 }
452 low = mid + 1;
453 }
454 } while (hi >= low);
455
456 return low - 1;
457}
458
459/// blend a variable proproportion(0-255) of one byte to another
460/// @param a - the starting byte value
461/// @param b - the byte value to blend toward
462/// @param amountOfB - the proportion (0-255) of b to blend
463/// @returns a byte value between a and b, inclusive
464#if (FASTLED_BLEND_FIXED == 1)
465LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
466{
467#if BLEND8_C == 1
468 uint16_t partial;
469 uint8_t result;
470
471 uint8_t amountOfA = 255 - amountOfB;
472
473 partial = (a * amountOfA);
474#if (FASTLED_SCALE8_FIXED == 1)
475 partial += a;
476 //partial = add8to16( a, partial);
477#endif
478
479 partial += (b * amountOfB);
480#if (FASTLED_SCALE8_FIXED == 1)
481 partial += b;
482 //partial = add8to16( b, partial);
483#endif
484
485 result = partial >> 8;
486
487 return result;
488
489#elif BLEND8_AVRASM == 1
490 uint16_t partial;
491 uint8_t result;
492
493 asm volatile (
494 /* partial = b * amountOfB */
495 " mul %[b], %[amountOfB] \n\t"
496 " movw %A[partial], r0 \n\t"
497
498 /* amountOfB (aka amountOfA) = 255 - amountOfB */
499 " com %[amountOfB] \n\t"
500
501 /* partial += a * amountOfB (aka amountOfA) */
502 " mul %[a], %[amountOfB] \n\t"
503
504 " add %A[partial], r0 \n\t"
505 " adc %B[partial], r1 \n\t"
506
507 " clr __zero_reg__ \n\t"
508
509#if (FASTLED_SCALE8_FIXED == 1)
510 /* partial += a */
511 " add %A[partial], %[a] \n\t"
512 " adc %B[partial], __zero_reg__ \n\t"
513
514 // partial += b
515 " add %A[partial], %[b] \n\t"
516 " adc %B[partial], __zero_reg__ \n\t"
517#endif
518
519 : [partial] "=r" (partial),
520 [amountOfB] "+a" (amountOfB)
521 : [a] "a" (a),
522 [b] "a" (b)
523 : "r0", "r1"
524 );
525
526 result = partial >> 8;
527
528 return result;
529
530#else
531#error "No implementation for blend8 available."
532#endif
533}
534
535#else
536LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
537{
538 // This version loses precision in the integer math
539 // and can actually return results outside of the range
540 // from a to b. Its use is not recommended.
541 uint8_t result;
542 uint8_t amountOfA = 255 - amountOfB;
543 result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
544 + scale8_LEAVING_R1_DIRTY( b, amountOfB);
545 cleanup_R1();
546 return result;
547}
548#endif
549
550
551///@}
552#endif