aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/lib8tion/LICENSE20
-rw-r--r--lib/lib8tion/lib8tion.c242
-rw-r--r--lib/lib8tion/lib8tion.h934
-rw-r--r--lib/lib8tion/math8.h552
-rw-r--r--lib/lib8tion/random8.h94
-rw-r--r--lib/lib8tion/scale8.h542
-rw-r--r--lib/lib8tion/trig8.h259
7 files changed, 2643 insertions, 0 deletions
diff --git a/lib/lib8tion/LICENSE b/lib/lib8tion/LICENSE
new file mode 100644
index 000000000..ebe476330
--- /dev/null
+++ b/lib/lib8tion/LICENSE
@@ -0,0 +1,20 @@
1The MIT License (MIT)
2
3Copyright (c) 2013 FastLED
4
5Permission is hereby granted, free of charge, to any person obtaining a copy of
6this software and associated documentation files (the "Software"), to deal in
7the Software without restriction, including without limitation the rights to
8use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9the Software, and to permit persons to whom the Software is furnished to do so,
10subject to the following conditions:
11
12The above copyright notice and this permission notice shall be included in all
13copies or substantial portions of the Software.
14
15THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c
new file mode 100644
index 000000000..84b3e9c61
--- /dev/null
+++ b/lib/lib8tion/lib8tion.c
@@ -0,0 +1,242 @@
1#define FASTLED_INTERNAL
2#include <stdint.h>
3
4#define RAND16_SEED 1337
5uint16_t rand16seed = RAND16_SEED;
6
7
8// memset8, memcpy8, memmove8:
9// optimized avr replacements for the standard "C" library
10// routines memset, memcpy, and memmove.
11//
12// There are two techniques that make these routines
13// faster than the standard avr-libc routines.
14// First, the loops are unrolled 2X, meaning that
15// the average loop overhead is cut in half.
16// And second, the compare-and-branch at the bottom
17// of each loop decrements the low byte of the
18// counter, and if the carry is clear, it branches
19// back up immediately. Only if the low byte math
20// causes carry do we bother to decrement the high
21// byte and check that result for carry as well.
22// Results for a 100-byte buffer are 20-40% faster
23// than standard avr-libc, at a cost of a few extra
24// bytes of code.
25
26#if defined(__AVR__)
27//__attribute__ ((noinline))
28void * memset8 ( void * ptr, uint8_t val, uint16_t num )
29{
30 asm volatile(
31 " movw r26, %[ptr] \n\t"
32 " sbrs %A[num], 0 \n\t"
33 " rjmp Lseteven_%= \n\t"
34 " rjmp Lsetodd_%= \n\t"
35 "Lsetloop_%=: \n\t"
36 " st X+, %[val] \n\t"
37 "Lsetodd_%=: \n\t"
38 " st X+, %[val] \n\t"
39 "Lseteven_%=: \n\t"
40 " subi %A[num], 2 \n\t"
41 " brcc Lsetloop_%= \n\t"
42 " sbci %B[num], 0 \n\t"
43 " brcc Lsetloop_%= \n\t"
44 : [num] "+r" (num)
45 : [ptr] "r" (ptr),
46 [val] "r" (val)
47 : "memory"
48 );
49 return ptr;
50}
51
52
53
54//__attribute__ ((noinline))
55void * memcpy8 ( void * dst, const void* src, uint16_t num )
56{
57 asm volatile(
58 " movw r30, %[src] \n\t"
59 " movw r26, %[dst] \n\t"
60 " sbrs %A[num], 0 \n\t"
61 " rjmp Lcpyeven_%= \n\t"
62 " rjmp Lcpyodd_%= \n\t"
63 "Lcpyloop_%=: \n\t"
64 " ld __tmp_reg__, Z+ \n\t"
65 " st X+, __tmp_reg__ \n\t"
66 "Lcpyodd_%=: \n\t"
67 " ld __tmp_reg__, Z+ \n\t"
68 " st X+, __tmp_reg__ \n\t"
69 "Lcpyeven_%=: \n\t"
70 " subi %A[num], 2 \n\t"
71 " brcc Lcpyloop_%= \n\t"
72 " sbci %B[num], 0 \n\t"
73 " brcc Lcpyloop_%= \n\t"
74 : [num] "+r" (num)
75 : [src] "r" (src),
76 [dst] "r" (dst)
77 : "memory"
78 );
79 return dst;
80}
81
82//__attribute__ ((noinline))
83void * memmove8 ( void * dst, const void* src, uint16_t num )
84{
85 if( src > dst) {
86 // if src > dst then we can use the forward-stepping memcpy8
87 return memcpy8( dst, src, num);
88 } else {
89 // if src < dst then we have to step backward:
90 dst = (char*)dst + num;
91 src = (char*)src + num;
92 asm volatile(
93 " movw r30, %[src] \n\t"
94 " movw r26, %[dst] \n\t"
95 " sbrs %A[num], 0 \n\t"
96 " rjmp Lmoveven_%= \n\t"
97 " rjmp Lmovodd_%= \n\t"
98 "Lmovloop_%=: \n\t"
99 " ld __tmp_reg__, -Z \n\t"
100 " st -X, __tmp_reg__ \n\t"
101 "Lmovodd_%=: \n\t"
102 " ld __tmp_reg__, -Z \n\t"
103 " st -X, __tmp_reg__ \n\t"
104 "Lmoveven_%=: \n\t"
105 " subi %A[num], 2 \n\t"
106 " brcc Lmovloop_%= \n\t"
107 " sbci %B[num], 0 \n\t"
108 " brcc Lmovloop_%= \n\t"
109 : [num] "+r" (num)
110 : [src] "r" (src),
111 [dst] "r" (dst)
112 : "memory"
113 );
114 return dst;
115 }
116}
117
118#endif /* AVR */
119
120
121
122
123#if 0
124// TEST / VERIFICATION CODE ONLY BELOW THIS POINT
125#include <Arduino.h>
126#include "lib8tion.h"
127
128void test1abs( int8_t i)
129{
130 Serial.print("abs("); Serial.print(i); Serial.print(") = ");
131 int8_t j = abs8(i);
132 Serial.print(j); Serial.println(" ");
133}
134
135void testabs()
136{
137 delay(5000);
138 for( int8_t q = -128; q != 127; q++) {
139 test1abs(q);
140 }
141 for(;;){};
142}
143
144
145void testmul8()
146{
147 delay(5000);
148 byte r, c;
149
150 Serial.println("mul8:");
151 for( r = 0; r <= 20; r += 1) {
152 Serial.print(r); Serial.print(" : ");
153 for( c = 0; c <= 20; c += 1) {
154 byte t;
155 t = mul8( r, c);
156 Serial.print(t); Serial.print(' ');
157 }
158 Serial.println(' ');
159 }
160 Serial.println("done.");
161 for(;;){};
162}
163
164
165void testscale8()
166{
167 delay(5000);
168 byte r, c;
169
170 Serial.println("scale8:");
171 for( r = 0; r <= 240; r += 10) {
172 Serial.print(r); Serial.print(" : ");
173 for( c = 0; c <= 240; c += 10) {
174 byte t;
175 t = scale8( r, c);
176 Serial.print(t); Serial.print(' ');
177 }
178 Serial.println(' ');
179 }
180
181 Serial.println(' ');
182 Serial.println("scale8_video:");
183
184 for( r = 0; r <= 100; r += 4) {
185 Serial.print(r); Serial.print(" : ");
186 for( c = 0; c <= 100; c += 4) {
187 byte t;
188 t = scale8_video( r, c);
189 Serial.print(t); Serial.print(' ');
190 }
191 Serial.println(' ');
192 }
193
194 Serial.println("done.");
195 for(;;){};
196}
197
198
199
200void testqadd8()
201{
202 delay(5000);
203 byte r, c;
204 for( r = 0; r <= 240; r += 10) {
205 Serial.print(r); Serial.print(" : ");
206 for( c = 0; c <= 240; c += 10) {
207 byte t;
208 t = qadd8( r, c);
209 Serial.print(t); Serial.print(' ');
210 }
211 Serial.println(' ');
212 }
213 Serial.println("done.");
214 for(;;){};
215}
216
217void testnscale8x3()
218{
219 delay(5000);
220 byte r, g, b, sc;
221 for( byte z = 0; z < 10; z++) {
222 r = random8(); g = random8(); b = random8(); sc = random8();
223
224 Serial.print("nscale8x3_video( ");
225 Serial.print(r); Serial.print(", ");
226 Serial.print(g); Serial.print(", ");
227 Serial.print(b); Serial.print(", ");
228 Serial.print(sc); Serial.print(") = [ ");
229
230 nscale8x3_video( r, g, b, sc);
231
232 Serial.print(r); Serial.print(", ");
233 Serial.print(g); Serial.print(", ");
234 Serial.print(b); Serial.print("]");
235
236 Serial.println(' ');
237 }
238 Serial.println("done.");
239 for(;;){};
240}
241
242#endif
diff --git a/lib/lib8tion/lib8tion.h b/lib/lib8tion/lib8tion.h
new file mode 100644
index 000000000..d93c748e6
--- /dev/null
+++ b/lib/lib8tion/lib8tion.h
@@ -0,0 +1,934 @@
1#ifndef __INC_LIB8TION_H
2#define __INC_LIB8TION_H
3
4/*
5
6 Fast, efficient 8-bit math functions specifically
7 designed for high-performance LED programming.
8
9 Because of the AVR(Arduino) and ARM assembly language
10 implementations provided, using these functions often
11 results in smaller and faster code than the equivalent
12 program using plain "C" arithmetic and logic.
13
14
15 Included are:
16
17
18 - Saturating unsigned 8-bit add and subtract.
19 Instead of wrapping around if an overflow occurs,
20 these routines just 'clamp' the output at a maxumum
21 of 255, or a minimum of 0. Useful for adding pixel
22 values. E.g., qadd8( 200, 100) = 255.
23
24 qadd8( i, j) == MIN( (i + j), 0xFF )
25 qsub8( i, j) == MAX( (i - j), 0 )
26
27 - Saturating signed 8-bit ("7-bit") add.
28 qadd7( i, j) == MIN( (i + j), 0x7F)
29
30
31 - Scaling (down) of unsigned 8- and 16- bit values.
32 Scaledown value is specified in 1/256ths.
33 scale8( i, sc) == (i * sc) / 256
34 scale16by8( i, sc) == (i * sc) / 256
35
36 Example: scaling a 0-255 value down into a
37 range from 0-99:
38 downscaled = scale8( originalnumber, 100);
39
40 A special version of scale8 is provided for scaling
41 LED brightness values, to make sure that they don't
42 accidentally scale down to total black at low
43 dimming levels, since that would look wrong:
44 scale8_video( i, sc) = ((i * sc) / 256) +? 1
45
46 Example: reducing an LED brightness by a
47 dimming factor:
48 new_bright = scale8_video( orig_bright, dimming);
49
50
51 - Fast 8- and 16- bit unsigned random numbers.
52 Significantly faster than Arduino random(), but
53 also somewhat less random. You can add entropy.
54 random8() == random from 0..255
55 random8( n) == random from 0..(N-1)
56 random8( n, m) == random from N..(M-1)
57
58 random16() == random from 0..65535
59 random16( n) == random from 0..(N-1)
60 random16( n, m) == random from N..(M-1)
61
62 random16_set_seed( k) == seed = k
63 random16_add_entropy( k) == seed += k
64
65
66 - Absolute value of a signed 8-bit value.
67 abs8( i) == abs( i)
68
69
70 - 8-bit math operations which return 8-bit values.
71 These are provided mostly for completeness,
72 not particularly for performance.
73 mul8( i, j) == (i * j) & 0xFF
74 add8( i, j) == (i + j) & 0xFF
75 sub8( i, j) == (i - j) & 0xFF
76
77
78 - Fast 16-bit approximations of sin and cos.
79 Input angle is a uint16_t from 0-65535.
80 Output is a signed int16_t from -32767 to 32767.
81 sin16( x) == sin( (x/32768.0) * pi) * 32767
82 cos16( x) == cos( (x/32768.0) * pi) * 32767
83 Accurate to more than 99% in all cases.
84
85 - Fast 8-bit approximations of sin and cos.
86 Input angle is a uint8_t from 0-255.
87 Output is an UNsigned uint8_t from 0 to 255.
88 sin8( x) == (sin( (x/128.0) * pi) * 128) + 128
89 cos8( x) == (cos( (x/128.0) * pi) * 128) + 128
90 Accurate to within about 2%.
91
92
93 - Fast 8-bit "easing in/out" function.
94 ease8InOutCubic(x) == 3(x^i) - 2(x^3)
95 ease8InOutApprox(x) ==
96 faster, rougher, approximation of cubic easing
97 ease8InOutQuad(x) == quadratic (vs cubic) easing
98
99 - Cubic, Quadratic, and Triangle wave functions.
100 Input is a uint8_t representing phase withing the wave,
101 similar to how sin8 takes an angle 'theta'.
102 Output is a uint8_t representing the amplitude of
103 the wave at that point.
104 cubicwave8( x)
105 quadwave8( x)
106 triwave8( x)
107
108 - Square root for 16-bit integers. About three times
109 faster and five times smaller than Arduino's built-in
110 generic 32-bit sqrt routine.
111 sqrt16( uint16_t x ) == sqrt( x)
112
113 - Dimming and brightening functions for 8-bit
114 light values.
115 dim8_video( x) == scale8_video( x, x)
116 dim8_raw( x) == scale8( x, x)
117 dim8_lin( x) == (x<128) ? ((x+1)/2) : scale8(x,x)
118 brighten8_video( x) == 255 - dim8_video( 255 - x)
119 brighten8_raw( x) == 255 - dim8_raw( 255 - x)
120 brighten8_lin( x) == 255 - dim8_lin( 255 - x)
121 The dimming functions in particular are suitable
122 for making LED light output appear more 'linear'.
123
124
125 - Linear interpolation between two values, with the
126 fraction between them expressed as an 8- or 16-bit
127 fixed point fraction (fract8 or fract16).
128 lerp8by8( fromU8, toU8, fract8 )
129 lerp16by8( fromU16, toU16, fract8 )
130 lerp15by8( fromS16, toS16, fract8 )
131 == from + (( to - from ) * fract8) / 256)
132 lerp16by16( fromU16, toU16, fract16 )
133 == from + (( to - from ) * fract16) / 65536)
134 map8( in, rangeStart, rangeEnd)
135 == map( in, 0, 255, rangeStart, rangeEnd);
136
137 - Optimized memmove, memcpy, and memset, that are
138 faster than standard avr-libc 1.8.
139 memmove8( dest, src, bytecount)
140 memcpy8( dest, src, bytecount)
141 memset8( buf, value, bytecount)
142
143 - Beat generators which return sine or sawtooth
144 waves in a specified number of Beats Per Minute.
145 Sine wave beat generators can specify a low and
146 high range for the output. Sawtooth wave beat
147 generators always range 0-255 or 0-65535.
148 beatsin8( BPM, low8, high8)
149 = (sine(beatphase) * (high8-low8)) + low8
150 beatsin16( BPM, low16, high16)
151 = (sine(beatphase) * (high16-low16)) + low16
152 beatsin88( BPM88, low16, high16)
153 = (sine(beatphase) * (high16-low16)) + low16
154 beat8( BPM) = 8-bit repeating sawtooth wave
155 beat16( BPM) = 16-bit repeating sawtooth wave
156 beat88( BPM88) = 16-bit repeating sawtooth wave
157 BPM is beats per minute in either simple form
158 e.g. 120, or Q8.8 fixed-point form.
159 BPM88 is beats per minute in ONLY Q8.8 fixed-point
160 form.
161
162Lib8tion is pronounced like 'libation': lie-BAY-shun
163
164*/
165
166
167
168#include <stdint.h>
169
170#define LIB8STATIC __attribute__ ((unused)) static inline
171#define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline
172
173#if !defined(__AVR__)
174#include <string.h>
175// for memmove, memcpy, and memset if not defined here
176#endif
177
178#if defined(__arm__)
179
180#if defined(FASTLED_TEENSY3)
181// Can use Cortex M4 DSP instructions
182#define QADD8_C 0
183#define QADD7_C 0
184#define QADD8_ARM_DSP_ASM 1
185#define QADD7_ARM_DSP_ASM 1
186#else
187// Generic ARM
188#define QADD8_C 1
189#define QADD7_C 1
190#endif
191
192#define QSUB8_C 1
193#define SCALE8_C 1
194#define SCALE16BY8_C 1
195#define SCALE16_C 1
196#define ABS8_C 1
197#define MUL8_C 1
198#define QMUL8_C 1
199#define ADD8_C 1
200#define SUB8_C 1
201#define EASE8_C 1
202#define AVG8_C 1
203#define AVG7_C 1
204#define AVG16_C 1
205#define AVG15_C 1
206#define BLEND8_C 1
207
208
209#elif defined(__AVR__)
210
211// AVR ATmega and friends Arduino
212
213#define QADD8_C 0
214#define QADD7_C 0
215#define QSUB8_C 0
216#define ABS8_C 0
217#define ADD8_C 0
218#define SUB8_C 0
219#define AVG8_C 0
220#define AVG7_C 0
221#define AVG16_C 0
222#define AVG15_C 0
223
224#define QADD8_AVRASM 1
225#define QADD7_AVRASM 1
226#define QSUB8_AVRASM 1
227#define ABS8_AVRASM 1
228#define ADD8_AVRASM 1
229#define SUB8_AVRASM 1
230#define AVG8_AVRASM 1
231#define AVG7_AVRASM 1
232#define AVG16_AVRASM 1
233#define AVG15_AVRASM 1
234
235// Note: these require hardware MUL instruction
236// -- sorry, ATtiny!
237#if !defined(LIB8_ATTINY)
238#define SCALE8_C 0
239#define SCALE16BY8_C 0
240#define SCALE16_C 0
241#define MUL8_C 0
242#define QMUL8_C 0
243#define EASE8_C 0
244#define BLEND8_C 0
245#define SCALE8_AVRASM 1
246#define SCALE16BY8_AVRASM 1
247#define SCALE16_AVRASM 1
248#define MUL8_AVRASM 1
249#define QMUL8_AVRASM 1
250#define EASE8_AVRASM 1
251#define CLEANUP_R1_AVRASM 1
252#define BLEND8_AVRASM 1
253#else
254// On ATtiny, we just use C implementations
255#define SCALE8_C 1
256#define SCALE16BY8_C 1
257#define SCALE16_C 1
258#define MUL8_C 1
259#define QMUL8_C 1
260#define EASE8_C 1
261#define BLEND8_C 1
262#define SCALE8_AVRASM 0
263#define SCALE16BY8_AVRASM 0
264#define SCALE16_AVRASM 0
265#define MUL8_AVRASM 0
266#define QMUL8_AVRASM 0
267#define EASE8_AVRASM 0
268#define BLEND8_AVRASM 0
269#endif
270
271#else
272
273// unspecified architecture, so
274// no ASM, everything in C
275#define QADD8_C 1
276#define QADD7_C 1
277#define QSUB8_C 1
278#define SCALE8_C 1
279#define SCALE16BY8_C 1
280#define SCALE16_C 1
281#define ABS8_C 1
282#define MUL8_C 1
283#define QMUL8_C 1
284#define ADD8_C 1
285#define SUB8_C 1
286#define EASE8_C 1
287#define AVG8_C 1
288#define AVG7_C 1
289#define AVG16_C 1
290#define AVG15_C 1
291#define BLEND8_C 1
292
293#endif
294
295///@defgroup lib8tion Fast math functions
296///A variety of functions for working with numbers.
297///@{
298
299
300///////////////////////////////////////////////////////////////////////
301//
302// typdefs for fixed-point fractional types.
303//
304// sfract7 should be interpreted as signed 128ths.
305// fract8 should be interpreted as unsigned 256ths.
306// sfract15 should be interpreted as signed 32768ths.
307// fract16 should be interpreted as unsigned 65536ths.
308//
309// Example: if a fract8 has the value "64", that should be interpreted
310// as 64/256ths, or one-quarter.
311//
312//
313// fract8 range is 0 to 0.99609375
314// in steps of 0.00390625
315//
316// sfract7 range is -0.9921875 to 0.9921875
317// in steps of 0.0078125
318//
319// fract16 range is 0 to 0.99998474121
320// in steps of 0.00001525878
321//
322// sfract15 range is -0.99996948242 to 0.99996948242
323// in steps of 0.00003051757
324//
325
326/// ANSI unsigned short _Fract. range is 0 to 0.99609375
327/// in steps of 0.00390625
328typedef uint8_t fract8; ///< ANSI: unsigned short _Fract
329
330/// ANSI: signed short _Fract. range is -0.9921875 to 0.9921875
331/// in steps of 0.0078125
332typedef int8_t sfract7; ///< ANSI: signed short _Fract
333
334/// ANSI: unsigned _Fract. range is 0 to 0.99998474121
335/// in steps of 0.00001525878
336typedef uint16_t fract16; ///< ANSI: unsigned _Fract
337
338/// ANSI: signed _Fract. range is -0.99996948242 to 0.99996948242
339/// in steps of 0.00003051757
340typedef int16_t sfract15; ///< ANSI: signed _Fract
341
342
343// accumXY types should be interpreted as X bits of integer,
344// and Y bits of fraction.
345// E.g., accum88 has 8 bits of int, 8 bits of fraction
346
347typedef uint16_t accum88; ///< ANSI: unsigned short _Accum. 8 bits int, 8 bits fraction
348typedef int16_t saccum78; ///< ANSI: signed short _Accum. 7 bits int, 8 bits fraction
349typedef uint32_t accum1616;///< ANSI: signed _Accum. 16 bits int, 16 bits fraction
350typedef int32_t saccum1516;///< ANSI: signed _Accum. 15 bits int, 16 bits fraction
351typedef uint16_t accum124; ///< no direct ANSI counterpart. 12 bits int, 4 bits fraction
352typedef int32_t saccum114;///< no direct ANSI counterpart. 1 bit int, 14 bits fraction
353
354
355
356#include "math8.h"
357#include "scale8.h"
358#include "random8.h"
359#include "trig8.h"
360
361///////////////////////////////////////////////////////////////////////
362
363
364
365
366
367
368
369///////////////////////////////////////////////////////////////////////
370//
371// float-to-fixed and fixed-to-float conversions
372//
373// Note that anything involving a 'float' on AVR will be slower.
374
375/// sfract15ToFloat: conversion from sfract15 fixed point to
376/// IEEE754 32-bit float.
377LIB8STATIC float sfract15ToFloat( sfract15 y)
378{
379 return y / 32768.0;
380}
381
382/// conversion from IEEE754 float in the range (-1,1)
383/// to 16-bit fixed point. Note that the extremes of
384/// one and negative one are NOT representable. The
385/// representable range is basically
386LIB8STATIC sfract15 floatToSfract15( float f)
387{
388 return f * 32768.0;
389}
390
391
392
393///////////////////////////////////////////////////////////////////////
394//
395// memmove8, memcpy8, and memset8:
396// alternatives to memmove, memcpy, and memset that are
397// faster on AVR than standard avr-libc 1.8
398
399#if defined(__AVR__)
400void * memmove8( void * dst, const void * src, uint16_t num );
401void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline));
402void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ;
403#else
404// on non-AVR platforms, these names just call standard libc.
405#define memmove8 memmove
406#define memcpy8 memcpy
407#define memset8 memset
408#endif
409
410
411///////////////////////////////////////////////////////////////////////
412//
413// linear interpolation, such as could be used for Perlin noise, etc.
414//
415
416// A note on the structure of the lerp functions:
417// The cases for b>a and b<=a are handled separately for
418// speed: without knowing the relative order of a and b,
419// the value (a-b) might be overflow the width of a or b,
420// and have to be promoted to a wider, slower type.
421// To avoid that, we separate the two cases, and are able
422// to do all the math in the same width as the arguments,
423// which is much faster and smaller on AVR.
424
425/// linear interpolation between two unsigned 8-bit values,
426/// with 8-bit fraction
427LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac)
428{
429 uint8_t result;
430 if( b > a) {
431 uint8_t delta = b - a;
432 uint8_t scaled = scale8( delta, frac);
433 result = a + scaled;
434 } else {
435 uint8_t delta = a - b;
436 uint8_t scaled = scale8( delta, frac);
437 result = a - scaled;
438 }
439 return result;
440}
441
442/// linear interpolation between two unsigned 16-bit values,
443/// with 16-bit fraction
444LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac)
445{
446 uint16_t result;
447 if( b > a ) {
448 uint16_t delta = b - a;
449 uint16_t scaled = scale16(delta, frac);
450 result = a + scaled;
451 } else {
452 uint16_t delta = a - b;
453 uint16_t scaled = scale16( delta, frac);
454 result = a - scaled;
455 }
456 return result;
457}
458
459/// linear interpolation between two unsigned 16-bit values,
460/// with 8-bit fraction
461LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac)
462{
463 uint16_t result;
464 if( b > a) {
465 uint16_t delta = b - a;
466 uint16_t scaled = scale16by8( delta, frac);
467 result = a + scaled;
468 } else {
469 uint16_t delta = a - b;
470 uint16_t scaled = scale16by8( delta, frac);
471 result = a - scaled;
472 }
473 return result;
474}
475
476/// linear interpolation between two signed 15-bit values,
477/// with 8-bit fraction
478LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac)
479{
480 int16_t result;
481 if( b > a) {
482 uint16_t delta = b - a;
483 uint16_t scaled = scale16by8( delta, frac);
484 result = a + scaled;
485 } else {
486 uint16_t delta = a - b;
487 uint16_t scaled = scale16by8( delta, frac);
488 result = a - scaled;
489 }
490 return result;
491}
492
493/// linear interpolation between two signed 15-bit values,
494/// with 8-bit fraction
495LIB8STATIC int16_t lerp15by16( int16_t a, int16_t b, fract16 frac)
496{
497 int16_t result;
498 if( b > a) {
499 uint16_t delta = b - a;
500 uint16_t scaled = scale16( delta, frac);
501 result = a + scaled;
502 } else {
503 uint16_t delta = a - b;
504 uint16_t scaled = scale16( delta, frac);
505 result = a - scaled;
506 }
507 return result;
508}
509
510/// map8: map from one full-range 8-bit value into a narrower
511/// range of 8-bit values, possibly a range of hues.
512///
513/// E.g. map myValue into a hue in the range blue..purple..pink..red
514/// hue = map8( myValue, HUE_BLUE, HUE_RED);
515///
516/// Combines nicely with the waveform functions (like sin8, etc)
517/// to produce continuous hue gradients back and forth:
518///
519/// hue = map8( sin8( myValue), HUE_BLUE, HUE_RED);
520///
521/// Mathematically simiar to lerp8by8, but arguments are more
522/// like Arduino's "map"; this function is similar to
523///
524/// map( in, 0, 255, rangeStart, rangeEnd)
525///
526/// but faster and specifically designed for 8-bit values.
527LIB8STATIC uint8_t map8( uint8_t in, uint8_t rangeStart, uint8_t rangeEnd)
528{
529 uint8_t rangeWidth = rangeEnd - rangeStart;
530 uint8_t out = scale8( in, rangeWidth);
531 out += rangeStart;
532 return out;
533}
534
535
536///////////////////////////////////////////////////////////////////////
537//
538// easing functions; see http://easings.net
539//
540
541/// ease8InOutQuad: 8-bit quadratic ease-in / ease-out function
542/// Takes around 13 cycles on AVR
543#if EASE8_C == 1
544LIB8STATIC uint8_t ease8InOutQuad( uint8_t i)
545{
546 uint8_t j = i;
547 if( j & 0x80 ) {
548 j = 255 - j;
549 }
550 uint8_t jj = scale8( j, j);
551 uint8_t jj2 = jj << 1;
552 if( i & 0x80 ) {
553 jj2 = 255 - jj2;
554 }
555 return jj2;
556}
557
558#elif EASE8_AVRASM == 1
559// This AVR asm version of ease8InOutQuad preserves one more
560// low-bit of precision than the C version, and is also slightly
561// smaller and faster.
562LIB8STATIC uint8_t ease8InOutQuad(uint8_t val) {
563 uint8_t j=val;
564 asm volatile (
565 "sbrc %[val], 7 \n"
566 "com %[j] \n"
567 "mul %[j], %[j] \n"
568 "add r0, %[j] \n"
569 "ldi %[j], 0 \n"
570 "adc %[j], r1 \n"
571 "lsl r0 \n" // carry = high bit of low byte of mul product
572 "rol %[j] \n" // j = (j * 2) + carry // preserve add'l bit of precision
573 "sbrc %[val], 7 \n"
574 "com %[j] \n"
575 "clr __zero_reg__ \n"
576 : [j] "+&a" (j)
577 : [val] "a" (val)
578 : "r0", "r1"
579 );
580 return j;
581}
582
583#else
584#error "No implementation for ease8InOutQuad available."
585#endif
586
587/// ease16InOutQuad: 16-bit quadratic ease-in / ease-out function
588// C implementation at this point
589LIB8STATIC uint16_t ease16InOutQuad( uint16_t i)
590{
591 uint16_t j = i;
592 if( j & 0x8000 ) {
593 j = 65535 - j;
594 }
595 uint16_t jj = scale16( j, j);
596 uint16_t jj2 = jj << 1;
597 if( i & 0x8000 ) {
598 jj2 = 65535 - jj2;
599 }
600 return jj2;
601}
602
603
604/// ease8InOutCubic: 8-bit cubic ease-in / ease-out function
605/// Takes around 18 cycles on AVR
606LIB8STATIC fract8 ease8InOutCubic( fract8 i)
607{
608 uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i);
609 uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i);
610
611 uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii));
612
613 /* the code generated for the above *'s automatically
614 cleans up R1, so there's no need to explicitily call
615 cleanup_R1(); */
616
617 uint8_t result = r1;
618
619 // if we got "256", return 255:
620 if( r1 & 0x100 ) {
621 result = 255;
622 }
623 return result;
624}
625
626/// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function
627/// shaped approximately like 'ease8InOutCubic',
628/// it's never off by more than a couple of percent
629/// from the actual cubic S-curve, and it executes
630/// more than twice as fast. Use when the cycles
631/// are more important than visual smoothness.
632/// Asm version takes around 7 cycles on AVR.
633
634#if EASE8_C == 1
635LIB8STATIC fract8 ease8InOutApprox( fract8 i)
636{
637 if( i < 64) {
638 // start with slope 0.5
639 i /= 2;
640 } else if( i > (255 - 64)) {
641 // end with slope 0.5
642 i = 255 - i;
643 i /= 2;
644 i = 255 - i;
645 } else {
646 // in the middle, use slope 192/128 = 1.5
647 i -= 64;
648 i += (i / 2);
649 i += 32;
650 }
651
652 return i;
653}
654
655#elif EASE8_AVRASM == 1
656LIB8STATIC uint8_t ease8InOutApprox( fract8 i)
657{
658 // takes around 7 cycles on AVR
659 asm volatile (
660 " subi %[i], 64 \n\t"
661 " cpi %[i], 128 \n\t"
662 " brcc Lshift_%= \n\t"
663
664 // middle case
665 " mov __tmp_reg__, %[i] \n\t"
666 " lsr __tmp_reg__ \n\t"
667 " add %[i], __tmp_reg__ \n\t"
668 " subi %[i], 224 \n\t"
669 " rjmp Ldone_%= \n\t"
670
671 // start or end case
672 "Lshift_%=: \n\t"
673 " lsr %[i] \n\t"
674 " subi %[i], 96 \n\t"
675
676 "Ldone_%=: \n\t"
677
678 : [i] "+&a" (i)
679 :
680 : "r0", "r1"
681 );
682 return i;
683}
684#else
685#error "No implementation for ease8 available."
686#endif
687
688
689
690/// triwave8: triangle (sawtooth) wave generator. Useful for
691/// turning a one-byte ever-increasing value into a
692/// one-byte value that oscillates up and down.
693///
694/// input output
695/// 0..127 0..254 (positive slope)
696/// 128..255 254..0 (negative slope)
697///
698/// On AVR this function takes just three cycles.
699///
700LIB8STATIC uint8_t triwave8(uint8_t in)
701{
702 if( in & 0x80) {
703 in = 255 - in;
704 }
705 uint8_t out = in << 1;
706 return out;
707}
708
709
710// quadwave8 and cubicwave8: S-shaped wave generators (like 'sine').
711// Useful for turning a one-byte 'counter' value into a
712// one-byte oscillating value that moves smoothly up and down,
713// with an 'acceleration' and 'deceleration' curve.
714//
715// These are even faster than 'sin8', and have
716// slightly different curve shapes.
717//
718
719/// quadwave8: quadratic waveform generator. Spends just a little more
720/// time at the limits than 'sine' does.
721LIB8STATIC uint8_t quadwave8(uint8_t in)
722{
723 return ease8InOutQuad( triwave8( in));
724}
725
726/// cubicwave8: cubic waveform generator. Spends visibly more time
727/// at the limits than 'sine' does.
728LIB8STATIC uint8_t cubicwave8(uint8_t in)
729{
730 return ease8InOutCubic( triwave8( in));
731}
732
733/// squarewave8: square wave generator. Useful for
734/// turning a one-byte ever-increasing value
735/// into a one-byte value that is either 0 or 255.
736/// The width of the output 'pulse' is
737/// determined by the pulsewidth argument:
738///
739///~~~
740/// If pulsewidth is 255, output is always 255.
741/// If pulsewidth < 255, then
742/// if input < pulsewidth then output is 255
743/// if input >= pulsewidth then output is 0
744///~~~
745///
746/// the output looking like:
747///
748///~~~
749/// 255 +--pulsewidth--+
750/// . | |
751/// 0 0 +--------(256-pulsewidth)--------
752///~~~
753///
754/// @param in
755/// @param pulsewidth
756/// @returns square wave output
757LIB8STATIC uint8_t squarewave8( uint8_t in, uint8_t pulsewidth)
758{
759 if( in < pulsewidth || (pulsewidth == 255)) {
760 return 255;
761 } else {
762 return 0;
763 }
764}
765
766
767// Beat generators - These functions produce waves at a given
768// number of 'beats per minute'. Internally, they use
769// the Arduino function 'millis' to track elapsed time.
770// Accuracy is a bit better than one part in a thousand.
771//
772// beat8( BPM ) returns an 8-bit value that cycles 'BPM' times
773// per minute, rising from 0 to 255, resetting to zero,
774// rising up again, etc.. The output of this function
775// is suitable for feeding directly into sin8, and cos8,
776// triwave8, quadwave8, and cubicwave8.
777// beat16( BPM ) returns a 16-bit value that cycles 'BPM' times
778// per minute, rising from 0 to 65535, resetting to zero,
779// rising up again, etc. The output of this function is
780// suitable for feeding directly into sin16 and cos16.
781// beat88( BPM88) is the same as beat16, except that the BPM88 argument
782// MUST be in Q8.8 fixed point format, e.g. 120BPM must
783// be specified as 120*256 = 30720.
784// beatsin8( BPM, uint8_t low, uint8_t high) returns an 8-bit value that
785// rises and falls in a sine wave, 'BPM' times per minute,
786// between the values of 'low' and 'high'.
787// beatsin16( BPM, uint16_t low, uint16_t high) returns a 16-bit value
788// that rises and falls in a sine wave, 'BPM' times per
789// minute, between the values of 'low' and 'high'.
790// beatsin88( BPM88, ...) is the same as beatsin16, except that the
791// BPM88 argument MUST be in Q8.8 fixed point format,
792// e.g. 120BPM must be specified as 120*256 = 30720.
793//
794// BPM can be supplied two ways. The simpler way of specifying BPM is as
795// a simple 8-bit integer from 1-255, (e.g., "120").
796// The more sophisticated way of specifying BPM allows for fractional
797// "Q8.8" fixed point number (an 'accum88') with an 8-bit integer part and
798// an 8-bit fractional part. The easiest way to construct this is to multiply
799// a floating point BPM value (e.g. 120.3) by 256, (e.g. resulting in 30796
800// in this case), and pass that as the 16-bit BPM argument.
801// "BPM88" MUST always be specified in Q8.8 format.
802//
803// Originally designed to make an entire animation project pulse with brightness.
804// For that effect, add this line just above your existing call to "FastLED.show()":
805//
806// uint8_t bright = beatsin8( 60 /*BPM*/, 192 /*dimmest*/, 255 /*brightest*/ ));
807// FastLED.setBrightness( bright );
808// FastLED.show();
809//
810// The entire animation will now pulse between brightness 192 and 255 once per second.
811
812
813// The beat generators need access to a millisecond counter.
814// On Arduino, this is "millis()". On other platforms, you'll
815// need to provide a function with this signature:
816// uint32_t get_millisecond_timer();
817// that provides similar functionality.
818// You can also force use of the get_millisecond_timer function
819// by #defining USE_GET_MILLISECOND_TIMER.
820#if (defined(ARDUINO) || defined(SPARK) || defined(FASTLED_HAS_MILLIS)) && !defined(USE_GET_MILLISECOND_TIMER)
821// Forward declaration of Arduino function 'millis'.
822//uint32_t millis();
823#define GET_MILLIS millis
824#else
825uint32_t get_millisecond_timer(void);
826#define GET_MILLIS get_millisecond_timer
827#endif
828
829// beat16 generates a 16-bit 'sawtooth' wave at a given BPM,
830/// with BPM specified in Q8.8 fixed-point format; e.g.
831/// for this function, 120 BPM MUST BE specified as
832/// 120*256 = 30720.
833/// If you just want to specify "120", use beat16 or beat8.
834LIB8STATIC uint16_t beat88( accum88 beats_per_minute_88, uint32_t timebase)
835{
836 // BPM is 'beats per minute', or 'beats per 60000ms'.
837 // To avoid using the (slower) division operator, we
838 // want to convert 'beats per 60000ms' to 'beats per 65536ms',
839 // and then use a simple, fast bit-shift to divide by 65536.
840 //
841 // The ratio 65536:60000 is 279.620266667:256; we'll call it 280:256.
842 // The conversion is accurate to about 0.05%, more or less,
843 // e.g. if you ask for "120 BPM", you'll get about "119.93".
844 return (((GET_MILLIS()) - timebase) * beats_per_minute_88 * 280) >> 16;
845}
846
847/// beat16 generates a 16-bit 'sawtooth' wave at a given BPM
848LIB8STATIC uint16_t beat16( accum88 beats_per_minute, uint32_t timebase)
849{
850 // Convert simple 8-bit BPM's to full Q8.8 accum88's if needed
851 if( beats_per_minute < 256) beats_per_minute <<= 8;
852 return beat88(beats_per_minute, timebase);
853}
854
855/// beat8 generates an 8-bit 'sawtooth' wave at a given BPM
856LIB8STATIC uint8_t beat8( accum88 beats_per_minute, uint32_t timebase)
857{
858 return beat16( beats_per_minute, timebase) >> 8;
859}
860
861/// beatsin88 generates a 16-bit sine wave at a given BPM,
862/// that oscillates within a given range.
863/// For this function, BPM MUST BE SPECIFIED as
864/// a Q8.8 fixed-point value; e.g. 120BPM must be
865/// specified as 120*256 = 30720.
866/// If you just want to specify "120", use beatsin16 or beatsin8.
867LIB8STATIC uint16_t beatsin88( accum88 beats_per_minute_88, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset)
868{
869 uint16_t beat = beat88( beats_per_minute_88, timebase);
870 uint16_t beatsin = (sin16( beat + phase_offset) + 32768);
871 uint16_t rangewidth = highest - lowest;
872 uint16_t scaledbeat = scale16( beatsin, rangewidth);
873 uint16_t result = lowest + scaledbeat;
874 return result;
875}
876
877/// beatsin16 generates a 16-bit sine wave at a given BPM,
878/// that oscillates within a given range.
879LIB8STATIC uint16_t beatsin16(accum88 beats_per_minute, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset)
880{
881 uint16_t beat = beat16( beats_per_minute, timebase);
882 uint16_t beatsin = (sin16( beat + phase_offset) + 32768);
883 uint16_t rangewidth = highest - lowest;
884 uint16_t scaledbeat = scale16( beatsin, rangewidth);
885 uint16_t result = lowest + scaledbeat;
886 return result;
887}
888
889/// beatsin8 generates an 8-bit sine wave at a given BPM,
890/// that oscillates within a given range.
891LIB8STATIC uint8_t beatsin8( accum88 beats_per_minute, uint8_t lowest, uint8_t highest, uint32_t timebase, uint8_t phase_offset)
892{
893 uint8_t beat = beat8( beats_per_minute, timebase);
894 uint8_t beatsin = sin8( beat + phase_offset);
895 uint8_t rangewidth = highest - lowest;
896 uint8_t scaledbeat = scale8( beatsin, rangewidth);
897 uint8_t result = lowest + scaledbeat;
898 return result;
899}
900
901
902/// Return the current seconds since boot in a 16-bit value. Used as part of the
903/// "every N time-periods" mechanism
904LIB8STATIC uint16_t seconds16(void)
905{
906 uint32_t ms = GET_MILLIS();
907 uint16_t s16;
908 s16 = ms / 1000;
909 return s16;
910}
911
912/// Return the current minutes since boot in a 16-bit value. Used as part of the
913/// "every N time-periods" mechanism
914LIB8STATIC uint16_t minutes16(void)
915{
916 uint32_t ms = GET_MILLIS();
917 uint16_t m16;
918 m16 = (ms / (60000L)) & 0xFFFF;
919 return m16;
920}
921
922/// Return the current hours since boot in an 8-bit value. Used as part of the
923/// "every N time-periods" mechanism
924LIB8STATIC uint8_t hours8(void)
925{
926 uint32_t ms = GET_MILLIS();
927 uint8_t h8;
928 h8 = (ms / (3600000L)) & 0xFF;
929 return h8;
930}
931
932///@}
933
934#endif
diff --git a/lib/lib8tion/math8.h b/lib/lib8tion/math8.h
new file mode 100644
index 000000000..8c6b6c227
--- /dev/null
+++ b/lib/lib8tion/math8.h
@@ -0,0 +1,552 @@
1#ifndef __INC_LIB8TION_MATH_H
2#define __INC_LIB8TION_MATH_H
3
4#include "scale8.h"
5
6///@ingroup lib8tion
7
8///@defgroup Math Basic math operations
9/// Fast, efficient 8-bit math functions specifically
10/// designed for high-performance LED programming.
11///
12/// Because of the AVR(Arduino) and ARM assembly language
13/// implementations provided, using these functions often
14/// results in smaller and faster code than the equivalent
15/// program using plain "C" arithmetic and logic.
16///@{
17
18
19/// add one byte to another, saturating at 0xFF
20/// @param i - first byte to add
21/// @param j - second byte to add
22/// @returns the sum of i & j, capped at 0xFF
23LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
24{
25#if QADD8_C == 1
26 uint16_t t = i + j;
27 if (t > 255) t = 255;
28 return t;
29#elif QADD8_AVRASM == 1
30 asm volatile(
31 /* First, add j to i, conditioning the C flag */
32 "add %0, %1 \n\t"
33
34 /* Now test the C flag.
35 If C is clear, we branch around a load of 0xFF into i.
36 If C is set, we go ahead and load 0xFF into i.
37 */
38 "brcc L_%= \n\t"
39 "ldi %0, 0xFF \n\t"
40 "L_%=: "
41 : "+a" (i)
42 : "a" (j) );
43 return i;
44#elif QADD8_ARM_DSP_ASM == 1
45 asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
46 return i;
47#else
48#error "No implementation for qadd8 available."
49#endif
50}
51
52/// Add one byte to another, saturating at 0x7F
53/// @param i - first byte to add
54/// @param j - second byte to add
55/// @returns the sum of i & j, capped at 0xFF
56LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
57{
58#if QADD7_C == 1
59 int16_t t = i + j;
60 if (t > 127) t = 127;
61 return t;
62#elif QADD7_AVRASM == 1
63 asm volatile(
64 /* First, add j to i, conditioning the V flag */
65 "add %0, %1 \n\t"
66
67 /* Now test the V flag.
68 If V is clear, we branch around a load of 0x7F into i.
69 If V is set, we go ahead and load 0x7F into i.
70 */
71 "brvc L_%= \n\t"
72 "ldi %0, 0x7F \n\t"
73 "L_%=: "
74 : "+a" (i)
75 : "a" (j) );
76
77 return i;
78#elif QADD7_ARM_DSP_ASM == 1
79 asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
80 return i;
81#else
82#error "No implementation for qadd7 available."
83#endif
84}
85
86/// subtract one byte from another, saturating at 0x00
87/// @returns i - j with a floor of 0
88LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
89{
90#if QSUB8_C == 1
91 int16_t t = i - j;
92 if (t < 0) t = 0;
93 return t;
94#elif QSUB8_AVRASM == 1
95
96 asm volatile(
97 /* First, subtract j from i, conditioning the C flag */
98 "sub %0, %1 \n\t"
99
100 /* Now test the C flag.
101 If C is clear, we branch around a load of 0x00 into i.
102 If C is set, we go ahead and load 0x00 into i.
103 */
104 "brcc L_%= \n\t"
105 "ldi %0, 0x00 \n\t"
106 "L_%=: "
107 : "+a" (i)
108 : "a" (j) );
109
110 return i;
111#else
112#error "No implementation for qsub8 available."
113#endif
114}
115
116/// add one byte to another, with one byte result
117LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
118{
119#if ADD8_C == 1
120 uint16_t t = i + j;
121 return t;
122#elif ADD8_AVRASM == 1
123 // Add j to i, period.
124 asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
125 return i;
126#else
127#error "No implementation for add8 available."
128#endif
129}
130
131/// add one byte to another, with one byte result
132LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
133{
134#if ADD8_C == 1
135 uint16_t t = i + j;
136 return t;
137#elif ADD8_AVRASM == 1
138 // Add i(one byte) to j(two bytes)
139 asm volatile( "add %A[j], %[i] \n\t"
140 "adc %B[j], __zero_reg__ \n\t"
141 : [j] "+a" (j)
142 : [i] "a" (i)
143 );
144 return i;
145#else
146#error "No implementation for add8to16 available."
147#endif
148}
149
150
151/// subtract one byte from another, 8-bit result
152LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
153{
154#if SUB8_C == 1
155 int16_t t = i - j;
156 return t;
157#elif SUB8_AVRASM == 1
158 // Subtract j from i, period.
159 asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
160 return i;
161#else
162#error "No implementation for sub8 available."
163#endif
164}
165
166/// Calculate an integer average of two unsigned
167/// 8-bit integer values (uint8_t).
168/// Fractional results are rounded down, e.g. avg8(20,41) = 30
169LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
170{
171#if AVG8_C == 1
172 return (i + j) >> 1;
173#elif AVG8_AVRASM == 1
174 asm volatile(
175 /* First, add j to i, 9th bit overflows into C flag */
176 "add %0, %1 \n\t"
177 /* Divide by two, moving C flag into high 8th bit */
178 "ror %0 \n\t"
179 : "+a" (i)
180 : "a" (j) );
181 return i;
182#else
183#error "No implementation for avg8 available."
184#endif
185}
186
187/// Calculate an integer average of two unsigned
188/// 16-bit integer values (uint16_t).
189/// Fractional results are rounded down, e.g. avg16(20,41) = 30
190LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
191{
192#if AVG16_C == 1
193 return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
194#elif AVG16_AVRASM == 1
195 asm volatile(
196 /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
197 "add %A[i], %A[j] \n\t"
198 /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
199 "adc %B[i], %B[j] \n\t"
200 /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
201 "ror %B[i] \n\t"
202 /* Divide iLo by two, moving C flag into high 8th bit */
203 "ror %A[i] \n\t"
204 : [i] "+a" (i)
205 : [j] "a" (j) );
206 return i;
207#else
208#error "No implementation for avg16 available."
209#endif
210}
211
212
213/// Calculate an integer average of two signed 7-bit
214/// integers (int8_t)
215/// If the first argument is even, result is rounded down.
216/// If the first argument is odd, result is result up.
217LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
218{
219#if AVG7_C == 1
220 return ((i + j) >> 1) + (i & 0x1);
221#elif AVG7_AVRASM == 1
222 asm volatile(
223 "asr %1 \n\t"
224 "asr %0 \n\t"
225 "adc %0, %1 \n\t"
226 : "+a" (i)
227 : "a" (j) );
228 return i;
229#else
230#error "No implementation for avg7 available."
231#endif
232}
233
234/// Calculate an integer average of two signed 15-bit
235/// integers (int16_t)
236/// If the first argument is even, result is rounded down.
237/// If the first argument is odd, result is result up.
238LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
239{
240#if AVG15_C == 1
241 return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
242#elif AVG15_AVRASM == 1
243 asm volatile(
244 /* first divide j by 2, throwing away lowest bit */
245 "asr %B[j] \n\t"
246 "ror %A[j] \n\t"
247 /* now divide i by 2, with lowest bit going into C */
248 "asr %B[i] \n\t"
249 "ror %A[i] \n\t"
250 /* add j + C to i */
251 "adc %A[i], %A[j] \n\t"
252 "adc %B[i], %B[j] \n\t"
253 : [i] "+a" (i)
254 : [j] "a" (j) );
255 return i;
256#else
257#error "No implementation for avg15 available."
258#endif
259}
260
261
262/// Calculate the remainder of one unsigned 8-bit
263/// value divided by anoter, aka A % M.
264/// Implemented by repeated subtraction, which is
265/// very compact, and very fast if A is 'probably'
266/// less than M. If A is a large multiple of M,
267/// the loop has to execute multiple times. However,
268/// even in that case, the loop is only two
269/// instructions long on AVR, i.e., quick.
270LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
271{
272#if defined(__AVR__)
273 asm volatile (
274 "L_%=: sub %[a],%[m] \n\t"
275 " brcc L_%= \n\t"
276 " add %[a],%[m] \n\t"
277 : [a] "+r" (a)
278 : [m] "r" (m)
279 );
280#else
281 while( a >= m) a -= m;
282#endif
283 return a;
284}
285
286/// Add two numbers, and calculate the modulo
287/// of the sum and a third number, M.
288/// In other words, it returns (A+B) % M.
289/// It is designed as a compact mechanism for
290/// incrementing a 'mode' switch and wrapping
291/// around back to 'mode 0' when the switch
292/// goes past the end of the available range.
293/// e.g. if you have seven modes, this switches
294/// to the next one and wraps around if needed:
295/// mode = addmod8( mode, 1, 7);
296///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
297LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
298{
299#if defined(__AVR__)
300 asm volatile (
301 " add %[a],%[b] \n\t"
302 "L_%=: sub %[a],%[m] \n\t"
303 " brcc L_%= \n\t"
304 " add %[a],%[m] \n\t"
305 : [a] "+r" (a)
306 : [b] "r" (b), [m] "r" (m)
307 );
308#else
309 a += b;
310 while( a >= m) a -= m;
311#endif
312 return a;
313}
314
315/// Subtract two numbers, and calculate the modulo
316/// of the difference and a third number, M.
317/// In other words, it returns (A-B) % M.
318/// It is designed as a compact mechanism for
319/// incrementing a 'mode' switch and wrapping
320/// around back to 'mode 0' when the switch
321/// goes past the end of the available range.
322/// e.g. if you have seven modes, this switches
323/// to the next one and wraps around if needed:
324/// mode = addmod8( mode, 1, 7);
325///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
326LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
327{
328#if defined(__AVR__)
329 asm volatile (
330 " sub %[a],%[b] \n\t"
331 "L_%=: sub %[a],%[m] \n\t"
332 " brcc L_%= \n\t"
333 " add %[a],%[m] \n\t"
334 : [a] "+r" (a)
335 : [b] "r" (b), [m] "r" (m)
336 );
337#else
338 a -= b;
339 while( a >= m) a -= m;
340#endif
341 return a;
342}
343
344/// 8x8 bit multiplication, with 8 bit result
345LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
346{
347#if MUL8_C == 1
348 return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
349#elif MUL8_AVRASM == 1
350 asm volatile(
351 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
352 "mul %0, %1 \n\t"
353 /* Extract the LOW 8-bits (r0) */
354 "mov %0, r0 \n\t"
355 /* Restore r1 to "0"; it's expected to always be that */
356 "clr __zero_reg__ \n\t"
357 : "+a" (i)
358 : "a" (j)
359 : "r0", "r1");
360
361 return i;
362#else
363#error "No implementation for mul8 available."
364#endif
365}
366
367
368/// saturating 8x8 bit multiplication, with 8 bit result
369/// @returns the product of i * j, capping at 0xFF
370LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
371{
372#if QMUL8_C == 1
373 int p = ((uint16_t)i * (uint16_t)(j) );
374 if( p > 255) p = 255;
375 return p;
376#elif QMUL8_AVRASM == 1
377 asm volatile(
378 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
379 " mul %0, %1 \n\t"
380 /* If high byte of result is zero, all is well. */
381 " tst r1 \n\t"
382 " breq Lnospill_%= \n\t"
383 /* If high byte of result > 0, saturate low byte to 0xFF */
384 " ldi %0,0xFF \n\t"
385 " rjmp Ldone_%= \n\t"
386 "Lnospill_%=: \n\t"
387 /* Extract the LOW 8-bits (r0) */
388 " mov %0, r0 \n\t"
389 "Ldone_%=: \n\t"
390 /* Restore r1 to "0"; it's expected to always be that */
391 " clr __zero_reg__ \n\t"
392 : "+a" (i)
393 : "a" (j)
394 : "r0", "r1");
395
396 return i;
397#else
398#error "No implementation for qmul8 available."
399#endif
400}
401
402
403/// take abs() of a signed 8-bit uint8_t
404LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
405{
406#if ABS8_C == 1
407 if( i < 0) i = -i;
408 return i;
409#elif ABS8_AVRASM == 1
410
411
412 asm volatile(
413 /* First, check the high bit, and prepare to skip if it's clear */
414 "sbrc %0, 7 \n"
415
416 /* Negate the value */
417 "neg %0 \n"
418
419 : "+r" (i) : "r" (i) );
420 return i;
421#else
422#error "No implementation for abs8 available."
423#endif
424}
425
426/// square root for 16-bit integers
427/// About three times faster and five times smaller
428/// than Arduino's general sqrt on AVR.
429LIB8STATIC uint8_t sqrt16(uint16_t x)
430{
431 if( x <= 1) {
432 return x;
433 }
434
435 uint8_t low = 1; // lower bound
436 uint8_t hi, mid;
437
438 if( x > 7904) {
439 hi = 255;
440 } else {
441 hi = (x >> 5) + 8; // initial estimate for upper bound
442 }
443
444 do {
445 mid = (low + hi) >> 1;
446 if ((uint16_t)(mid * mid) > x) {
447 hi = mid - 1;
448 } else {
449 if( mid == 255) {
450 return 255;
451 }
452 low = mid + 1;
453 }
454 } while (hi >= low);
455
456 return low - 1;
457}
458
459/// blend a variable proproportion(0-255) of one byte to another
460/// @param a - the starting byte value
461/// @param b - the byte value to blend toward
462/// @param amountOfB - the proportion (0-255) of b to blend
463/// @returns a byte value between a and b, inclusive
464#if (FASTLED_BLEND_FIXED == 1)
465LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
466{
467#if BLEND8_C == 1
468 uint16_t partial;
469 uint8_t result;
470
471 uint8_t amountOfA = 255 - amountOfB;
472
473 partial = (a * amountOfA);
474#if (FASTLED_SCALE8_FIXED == 1)
475 partial += a;
476 //partial = add8to16( a, partial);
477#endif
478
479 partial += (b * amountOfB);
480#if (FASTLED_SCALE8_FIXED == 1)
481 partial += b;
482 //partial = add8to16( b, partial);
483#endif
484
485 result = partial >> 8;
486
487 return result;
488
489#elif BLEND8_AVRASM == 1
490 uint16_t partial;
491 uint8_t result;
492
493 asm volatile (
494 /* partial = b * amountOfB */
495 " mul %[b], %[amountOfB] \n\t"
496 " movw %A[partial], r0 \n\t"
497
498 /* amountOfB (aka amountOfA) = 255 - amountOfB */
499 " com %[amountOfB] \n\t"
500
501 /* partial += a * amountOfB (aka amountOfA) */
502 " mul %[a], %[amountOfB] \n\t"
503
504 " add %A[partial], r0 \n\t"
505 " adc %B[partial], r1 \n\t"
506
507 " clr __zero_reg__ \n\t"
508
509#if (FASTLED_SCALE8_FIXED == 1)
510 /* partial += a */
511 " add %A[partial], %[a] \n\t"
512 " adc %B[partial], __zero_reg__ \n\t"
513
514 // partial += b
515 " add %A[partial], %[b] \n\t"
516 " adc %B[partial], __zero_reg__ \n\t"
517#endif
518
519 : [partial] "=r" (partial),
520 [amountOfB] "+a" (amountOfB)
521 : [a] "a" (a),
522 [b] "a" (b)
523 : "r0", "r1"
524 );
525
526 result = partial >> 8;
527
528 return result;
529
530#else
531#error "No implementation for blend8 available."
532#endif
533}
534
535#else
536LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
537{
538 // This version loses precision in the integer math
539 // and can actually return results outside of the range
540 // from a to b. Its use is not recommended.
541 uint8_t result;
542 uint8_t amountOfA = 255 - amountOfB;
543 result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
544 + scale8_LEAVING_R1_DIRTY( b, amountOfB);
545 cleanup_R1();
546 return result;
547}
548#endif
549
550
551///@}
552#endif
diff --git a/lib/lib8tion/random8.h b/lib/lib8tion/random8.h
new file mode 100644
index 000000000..7ee67cbb3
--- /dev/null
+++ b/lib/lib8tion/random8.h
@@ -0,0 +1,94 @@
1#ifndef __INC_LIB8TION_RANDOM_H
2#define __INC_LIB8TION_RANDOM_H
3///@ingroup lib8tion
4
5///@defgroup Random Fast random number generators
6/// Fast 8- and 16- bit unsigned random numbers.
7/// Significantly faster than Arduino random(), but
8/// also somewhat less random. You can add entropy.
9///@{
10
11// X(n+1) = (2053 * X(n)) + 13849)
12#define FASTLED_RAND16_2053 ((uint16_t)(2053))
13#define FASTLED_RAND16_13849 ((uint16_t)(13849))
14
15/// random number seed
16extern uint16_t rand16seed;// = RAND16_SEED;
17
18/// Generate an 8-bit random number
19LIB8STATIC uint8_t random8(void)
20{
21 rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849;
22 // return the sum of the high and low bytes, for better
23 // mixing and non-sequential correlation
24 return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) +
25 ((uint8_t)(rand16seed >> 8)));
26}
27
28/// Generate a 16 bit random number
29LIB8STATIC uint16_t random16(void)
30{
31 rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849;
32 return rand16seed;
33}
34
35/// Generate an 8-bit random number between 0 and lim
36/// @param lim the upper bound for the result
37LIB8STATIC uint8_t random8_max(uint8_t lim)
38{
39 uint8_t r = random8();
40 r = (r*lim) >> 8;
41 return r;
42}
43
44/// Generate an 8-bit random number in the given range
45/// @param min the lower bound for the random number
46/// @param lim the upper bound for the random number
47LIB8STATIC uint8_t random8_min_max(uint8_t min, uint8_t lim)
48{
49 uint8_t delta = lim - min;
50 uint8_t r = random8_max(delta) + min;
51 return r;
52}
53
54/// Generate an 16-bit random number between 0 and lim
55/// @param lim the upper bound for the result
56LIB8STATIC uint16_t random16_max(uint16_t lim)
57{
58 uint16_t r = random16();
59 uint32_t p = (uint32_t)lim * (uint32_t)r;
60 r = p >> 16;
61 return r;
62}
63
64/// Generate an 16-bit random number in the given range
65/// @param min the lower bound for the random number
66/// @param lim the upper bound for the random number
67LIB8STATIC uint16_t random16_min_max( uint16_t min, uint16_t lim)
68{
69 uint16_t delta = lim - min;
70 uint16_t r = random16_max(delta) + min;
71 return r;
72}
73
74/// Set the 16-bit seed used for the random number generator
75LIB8STATIC void random16_set_seed(uint16_t seed)
76{
77 rand16seed = seed;
78}
79
80/// Get the current seed value for the random number generator
81LIB8STATIC uint16_t random16_get_seed(void)
82{
83 return rand16seed;
84}
85
86/// Add entropy into the random number generator
87LIB8STATIC void random16_add_entropy(uint16_t entropy)
88{
89 rand16seed += entropy;
90}
91
92///@}
93
94#endif
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h
new file mode 100644
index 000000000..9895fd4d7
--- /dev/null
+++ b/lib/lib8tion/scale8.h
@@ -0,0 +1,542 @@
1#ifndef __INC_LIB8TION_SCALE_H
2#define __INC_LIB8TION_SCALE_H
3
4///@ingroup lib8tion
5
6///@defgroup Scaling Scaling functions
7/// Fast, efficient 8-bit scaling functions specifically
8/// designed for high-performance LED programming.
9///
10/// Because of the AVR(Arduino) and ARM assembly language
11/// implementations provided, using these functions often
12/// results in smaller and faster code than the equivalent
13/// program using plain "C" arithmetic and logic.
14///@{
15
16/// scale one byte by a second one, which is treated as
17/// the numerator of a fraction whose denominator is 256
18/// In other words, it computes i * (scale / 256)
19/// 4 clocks AVR with MUL, 2 clocks ARM
20LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
21{
22#if SCALE8_C == 1
23#if (FASTLED_SCALE8_FIXED == 1)
24 return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
25#else
26 return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
27#endif
28#elif SCALE8_AVRASM == 1
29#if defined(LIB8_ATTINY)
30#if (FASTLED_SCALE8_FIXED == 1)
31 uint8_t work=i;
32#else
33 uint8_t work=0;
34#endif
35 uint8_t cnt=0x80;
36 asm volatile(
37#if (FASTLED_SCALE8_FIXED == 1)
38 " inc %[scale] \n\t"
39 " breq DONE_%= \n\t"
40 " clr %[work] \n\t"
41#endif
42 "LOOP_%=: \n\t"
43 /*" sbrc %[scale], 0 \n\t"
44 " add %[work], %[i] \n\t"
45 " ror %[work] \n\t"
46 " lsr %[scale] \n\t"
47 " clc \n\t"*/
48 " sbrc %[scale], 0 \n\t"
49 " add %[work], %[i] \n\t"
50 " ror %[work] \n\t"
51 " lsr %[scale] \n\t"
52 " lsr %[cnt] \n\t"
53 "brcc LOOP_%= \n\t"
54 "DONE_%=: \n\t"
55 : [work] "+r" (work), [cnt] "+r" (cnt)
56 : [scale] "r" (scale), [i] "r" (i)
57 :
58 );
59 return work;
60#else
61 asm volatile(
62#if (FASTLED_SCALE8_FIXED==1)
63 // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
64 "mul %0, %1 \n\t"
65 // Add i to r0, possibly setting the carry flag
66 "add r0, %0 \n\t"
67 // load the immediate 0 into i (note, this does _not_ touch any flags)
68 "ldi %0, 0x00 \n\t"
69 // walk and chew gum at the same time
70 "adc %0, r1 \n\t"
71#else
72 /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
73 "mul %0, %1 \n\t"
74 /* Move the high 8-bits of the product (r1) back to i */
75 "mov %0, r1 \n\t"
76 /* Restore r1 to "0"; it's expected to always be that */
77#endif
78 "clr __zero_reg__ \n\t"
79
80 : "+a" (i) /* writes to i */
81 : "a" (scale) /* uses scale */
82 : "r0", "r1" /* clobbers r0, r1 */ );
83
84 /* Return the result */
85 return i;
86#endif
87#else
88#error "No implementation for scale8 available."
89#endif
90}
91
92
93/// The "video" version of scale8 guarantees that the output will
94/// be only be zero if one or both of the inputs are zero. If both
95/// inputs are non-zero, the output is guaranteed to be non-zero.
96/// This makes for better 'video'/LED dimming, at the cost of
97/// several additional cycles.
98LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
99{
100#if SCALE8_C == 1 || defined(LIB8_ATTINY)
101 uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
102 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
103 // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
104 return j;
105#elif SCALE8_AVRASM == 1
106 uint8_t j=0;
107 asm volatile(
108 " tst %[i]\n\t"
109 " breq L_%=\n\t"
110 " mul %[i], %[scale]\n\t"
111 " mov %[j], r1\n\t"
112 " clr __zero_reg__\n\t"
113 " cpse %[scale], r1\n\t"
114 " subi %[j], 0xFF\n\t"
115 "L_%=: \n\t"
116 : [j] "+a" (j)
117 : [i] "a" (i), [scale] "a" (scale)
118 : "r0", "r1");
119
120 return j;
121 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
122 // asm volatile(
123 // " tst %0 \n"
124 // " breq L_%= \n"
125 // " mul %0, %1 \n"
126 // " mov %0, r1 \n"
127 // " add %0, %2 \n"
128 // " clr __zero_reg__ \n"
129 // "L_%=: \n"
130
131 // : "+a" (i)
132 // : "a" (scale), "a" (nonzeroscale)
133 // : "r0", "r1");
134
135 // // Return the result
136 // return i;
137#else
138#error "No implementation for scale8_video available."
139#endif
140}
141
142
143/// This version of scale8 does not clean up the R1 register on AVR
144/// If you are doing several 'scale8's in a row, use this, and
145/// then explicitly call cleanup_R1.
146LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
147{
148#if SCALE8_C == 1
149#if (FASTLED_SCALE8_FIXED == 1)
150 return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
151#else
152 return ((int)i * (int)(scale) ) >> 8;
153#endif
154#elif SCALE8_AVRASM == 1
155 asm volatile(
156 #if (FASTLED_SCALE8_FIXED==1)
157 // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
158 "mul %0, %1 \n\t"
159 // Add i to r0, possibly setting the carry flag
160 "add r0, %0 \n\t"
161 // load the immediate 0 into i (note, this does _not_ touch any flags)
162 "ldi %0, 0x00 \n\t"
163 // walk and chew gum at the same time
164 "adc %0, r1 \n\t"
165 #else
166 /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
167 "mul %0, %1 \n\t"
168 /* Move the high 8-bits of the product (r1) back to i */
169 "mov %0, r1 \n\t"
170 #endif
171 /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
172 /* "clr __zero_reg__ \n\t" */
173
174 : "+a" (i) /* writes to i */
175 : "a" (scale) /* uses scale */
176 : "r0", "r1" /* clobbers r0, r1 */ );
177
178 // Return the result
179 return i;
180#else
181#error "No implementation for scale8_LEAVING_R1_DIRTY available."
182#endif
183}
184
185
186/// This version of scale8_video does not clean up the R1 register on AVR
187/// If you are doing several 'scale8_video's in a row, use this, and
188/// then explicitly call cleanup_R1.
189LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
190{
191#if SCALE8_C == 1 || defined(LIB8_ATTINY)
192 uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
193 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
194 // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
195 return j;
196#elif SCALE8_AVRASM == 1
197 uint8_t j=0;
198 asm volatile(
199 " tst %[i]\n\t"
200 " breq L_%=\n\t"
201 " mul %[i], %[scale]\n\t"
202 " mov %[j], r1\n\t"
203 " breq L_%=\n\t"
204 " subi %[j], 0xFF\n\t"
205 "L_%=: \n\t"
206 : [j] "+a" (j)
207 : [i] "a" (i), [scale] "a" (scale)
208 : "r0", "r1");
209
210 return j;
211 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
212 // asm volatile(
213 // " tst %0 \n"
214 // " breq L_%= \n"
215 // " mul %0, %1 \n"
216 // " mov %0, r1 \n"
217 // " add %0, %2 \n"
218 // " clr __zero_reg__ \n"
219 // "L_%=: \n"
220
221 // : "+a" (i)
222 // : "a" (scale), "a" (nonzeroscale)
223 // : "r0", "r1");
224
225 // // Return the result
226 // return i;
227#else
228#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
229#endif
230}
231
232/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
233LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void)
234{
235#if CLEANUP_R1_AVRASM == 1
236 // Restore r1 to "0"; it's expected to always be that
237 asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
238#endif
239}
240
241
242/// scale a 16-bit unsigned value by an 8-bit value,
243/// considered as numerator of a fraction whose denominator
244/// is 256. In other words, it computes i * (scale / 256)
245
246LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
247{
248#if SCALE16BY8_C == 1
249 uint16_t result;
250#if FASTLED_SCALE8_FIXED == 1
251 result = (i * (1+((uint16_t)scale))) >> 8;
252#else
253 result = (i * scale) / 256;
254#endif
255 return result;
256#elif SCALE16BY8_AVRASM == 1
257#if FASTLED_SCALE8_FIXED == 1
258 uint16_t result = 0;
259 asm volatile(
260 // result.A = HighByte( (i.A x scale) + i.A )
261 " mul %A[i], %[scale] \n\t"
262 " add r0, %A[i] \n\t"
263 // " adc r1, [zero] \n\t"
264 // " mov %A[result], r1 \n\t"
265 " adc %A[result], r1 \n\t"
266
267 // result.A-B += i.B x scale
268 " mul %B[i], %[scale] \n\t"
269 " add %A[result], r0 \n\t"
270 " adc %B[result], r1 \n\t"
271
272 // cleanup r1
273 " clr __zero_reg__ \n\t"
274
275 // result.A-B += i.B
276 " add %A[result], %B[i] \n\t"
277 " adc %B[result], __zero_reg__ \n\t"
278
279 : [result] "+r" (result)
280 : [i] "r" (i), [scale] "r" (scale)
281 : "r0", "r1"
282 );
283 return result;
284#else
285 uint16_t result = 0;
286 asm volatile(
287 // result.A = HighByte(i.A x j )
288 " mul %A[i], %[scale] \n\t"
289 " mov %A[result], r1 \n\t"
290 //" clr %B[result] \n\t"
291
292 // result.A-B += i.B x j
293 " mul %B[i], %[scale] \n\t"
294 " add %A[result], r0 \n\t"
295 " adc %B[result], r1 \n\t"
296
297 // cleanup r1
298 " clr __zero_reg__ \n\t"
299
300 : [result] "+r" (result)
301 : [i] "r" (i), [scale] "r" (scale)
302 : "r0", "r1"
303 );
304 return result;
305#endif
306#else
307 #error "No implementation for scale16by8 available."
308#endif
309}
310
311/// scale a 16-bit unsigned value by a 16-bit value,
312/// considered as numerator of a fraction whose denominator
313/// is 65536. In other words, it computes i * (scale / 65536)
314
315LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
316{
317 #if SCALE16_C == 1
318 uint16_t result;
319#if FASTLED_SCALE8_FIXED == 1
320 result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
321#else
322 result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
323#endif
324 return result;
325#elif SCALE16_AVRASM == 1
326#if FASTLED_SCALE8_FIXED == 1
327 // implemented sort of like
328 // result = ((i * scale) + i ) / 65536
329 //
330 // why not like this, you may ask?
331 // result = (i * (scale+1)) / 65536
332 // the answer is that if scale is 65535, then scale+1
333 // will be zero, which is not what we want.
334 uint32_t result;
335 asm volatile(
336 // result.A-B = i.A x scale.A
337 " mul %A[i], %A[scale] \n\t"
338 // save results...
339 // basic idea:
340 //" mov %A[result], r0 \n\t"
341 //" mov %B[result], r1 \n\t"
342 // which can be written as...
343 " movw %A[result], r0 \n\t"
344 // Because we're going to add i.A-B to
345 // result.A-D, we DO need to keep both
346 // the r0 and r1 portions of the product
347 // UNlike in the 'unfixed scale8' version.
348 // So the movw here is needed.
349 : [result] "=r" (result)
350 : [i] "r" (i),
351 [scale] "r" (scale)
352 : "r0", "r1"
353 );
354
355 asm volatile(
356 // result.C-D = i.B x scale.B
357 " mul %B[i], %B[scale] \n\t"
358 //" mov %C[result], r0 \n\t"
359 //" mov %D[result], r1 \n\t"
360 " movw %C[result], r0 \n\t"
361 : [result] "+r" (result)
362 : [i] "r" (i),
363 [scale] "r" (scale)
364 : "r0", "r1"
365 );
366
367 const uint8_t zero = 0;
368 asm volatile(
369 // result.B-D += i.B x scale.A
370 " mul %B[i], %A[scale] \n\t"
371
372 " add %B[result], r0 \n\t"
373 " adc %C[result], r1 \n\t"
374 " adc %D[result], %[zero] \n\t"
375
376 // result.B-D += i.A x scale.B
377 " mul %A[i], %B[scale] \n\t"
378
379 " add %B[result], r0 \n\t"
380 " adc %C[result], r1 \n\t"
381 " adc %D[result], %[zero] \n\t"
382
383 // cleanup r1
384 " clr r1 \n\t"
385
386 : [result] "+r" (result)
387 : [i] "r" (i),
388 [scale] "r" (scale),
389 [zero] "r" (zero)
390 : "r0", "r1"
391 );
392
393 asm volatile(
394 // result.A-D += i.A-B
395 " add %A[result], %A[i] \n\t"
396 " adc %B[result], %B[i] \n\t"
397 " adc %C[result], %[zero] \n\t"
398 " adc %D[result], %[zero] \n\t"
399 : [result] "+r" (result)
400 : [i] "r" (i),
401 [zero] "r" (zero)
402 );
403
404 result = result >> 16;
405 return result;
406#else
407 uint32_t result;
408 asm volatile(
409 // result.A-B = i.A x scale.A
410 " mul %A[i], %A[scale] \n\t"
411 // save results...
412 // basic idea:
413 //" mov %A[result], r0 \n\t"
414 //" mov %B[result], r1 \n\t"
415 // which can be written as...
416 " movw %A[result], r0 \n\t"
417 // We actually don't need to do anything with r0,
418 // as result.A is never used again here, so we
419 // could just move the high byte, but movw is
420 // one clock cycle, just like mov, so might as
421 // well, in case we want to use this code for
422 // a generic 16x16 multiply somewhere.
423
424 : [result] "=r" (result)
425 : [i] "r" (i),
426 [scale] "r" (scale)
427 : "r0", "r1"
428 );
429
430 asm volatile(
431 // result.C-D = i.B x scale.B
432 " mul %B[i], %B[scale] \n\t"
433 //" mov %C[result], r0 \n\t"
434 //" mov %D[result], r1 \n\t"
435 " movw %C[result], r0 \n\t"
436 : [result] "+r" (result)
437 : [i] "r" (i),
438 [scale] "r" (scale)
439 : "r0", "r1"
440 );
441
442 const uint8_t zero = 0;
443 asm volatile(
444 // result.B-D += i.B x scale.A
445 " mul %B[i], %A[scale] \n\t"
446
447 " add %B[result], r0 \n\t"
448 " adc %C[result], r1 \n\t"
449 " adc %D[result], %[zero] \n\t"
450
451 // result.B-D += i.A x scale.B
452 " mul %A[i], %B[scale] \n\t"
453
454 " add %B[result], r0 \n\t"
455 " adc %C[result], r1 \n\t"
456 " adc %D[result], %[zero] \n\t"
457
458 // cleanup r1
459 " clr r1 \n\t"
460
461 : [result] "+r" (result)
462 : [i] "r" (i),
463 [scale] "r" (scale),
464 [zero] "r" (zero)
465 : "r0", "r1"
466 );
467
468 result = result >> 16;
469 return result;
470#endif
471#else
472 #error "No implementation for scale16 available."
473#endif
474}
475///@}
476
477///@defgroup Dimming Dimming and brightening functions
478///
479/// Dimming and brightening functions
480///
481/// The eye does not respond in a linear way to light.
482/// High speed PWM'd LEDs at 50% duty cycle appear far
483/// brighter then the 'half as bright' you might expect.
484///
485/// If you want your midpoint brightness leve (128) to
486/// appear half as bright as 'full' brightness (255), you
487/// have to apply a 'dimming function'.
488///@{
489
490/// Adjust a scaling value for dimming
491LIB8STATIC uint8_t dim8_raw( uint8_t x)
492{
493 return scale8( x, x);
494}
495
496/// Adjust a scaling value for dimming for video (value will never go below 1)
497LIB8STATIC uint8_t dim8_video( uint8_t x)
498{
499 return scale8_video( x, x);
500}
501
502/// Linear version of the dimming function that halves for values < 128
503LIB8STATIC uint8_t dim8_lin( uint8_t x )
504{
505 if( x & 0x80 ) {
506 x = scale8( x, x);
507 } else {
508 x += 1;
509 x /= 2;
510 }
511 return x;
512}
513
514/// inverse of the dimming function, brighten a value
515LIB8STATIC uint8_t brighten8_raw( uint8_t x)
516{
517 uint8_t ix = 255 - x;
518 return 255 - scale8( ix, ix);
519}
520
521/// inverse of the dimming function, brighten a value
522LIB8STATIC uint8_t brighten8_video( uint8_t x)
523{
524 uint8_t ix = 255 - x;
525 return 255 - scale8_video( ix, ix);
526}
527
528/// inverse of the dimming function, brighten a value
529LIB8STATIC uint8_t brighten8_lin( uint8_t x )
530{
531 uint8_t ix = 255 - x;
532 if( ix & 0x80 ) {
533 ix = scale8( ix, ix);
534 } else {
535 ix += 1;
536 ix /= 2;
537 }
538 return 255 - ix;
539}
540
541///@}
542#endif
diff --git a/lib/lib8tion/trig8.h b/lib/lib8tion/trig8.h
new file mode 100644
index 000000000..4907c6ff3
--- /dev/null
+++ b/lib/lib8tion/trig8.h
@@ -0,0 +1,259 @@
1#ifndef __INC_LIB8TION_TRIG_H
2#define __INC_LIB8TION_TRIG_H
3
4///@ingroup lib8tion
5
6///@defgroup Trig Fast trig functions
7/// Fast 8 and 16-bit approximations of sin(x) and cos(x).
8/// Don't use these approximations for calculating the
9/// trajectory of a rocket to Mars, but they're great
10/// for art projects and LED displays.
11///
12/// On Arduino/AVR, the 16-bit approximation is more than
13/// 10X faster than floating point sin(x) and cos(x), while
14/// the 8-bit approximation is more than 20X faster.
15///@{
16
17#if defined(__AVR__)
18#define sin16 sin16_avr
19#else
20#define sin16 sin16_C
21#endif
22
23/// Fast 16-bit approximation of sin(x). This approximation never varies more than
24/// 0.69% from the floating point value you'd get by doing
25///
26/// float s = sin(x) * 32767.0;
27///
28/// @param theta input angle from 0-65535
29/// @returns sin of theta, value between -32767 to 32767.
30LIB8STATIC int16_t sin16_avr( uint16_t theta )
31{
32 static const uint8_t data[] =
33 { 0, 0, 49, 0, 6393%256, 6393/256, 48, 0,
34 12539%256, 12539/256, 44, 0, 18204%256, 18204/256, 38, 0,
35 23170%256, 23170/256, 31, 0, 27245%256, 27245/256, 23, 0,
36 30273%256, 30273/256, 14, 0, 32137%256, 32137/256, 4 /*,0*/ };
37
38 uint16_t offset = (theta & 0x3FFF);
39
40 // AVR doesn't have a multi-bit shift instruction,
41 // so if we say "offset >>= 3", gcc makes a tiny loop.
42 // Inserting empty volatile statements between each
43 // bit shift forces gcc to unroll the loop.
44 offset >>= 1; // 0..8191
45 asm volatile("");
46 offset >>= 1; // 0..4095
47 asm volatile("");
48 offset >>= 1; // 0..2047
49
50 if( theta & 0x4000 ) offset = 2047 - offset;
51
52 uint8_t sectionX4;
53 sectionX4 = offset / 256;
54 sectionX4 *= 4;
55
56 uint8_t m;
57
58 union {
59 uint16_t b;
60 struct {
61 uint8_t blo;
62 uint8_t bhi;
63 };
64 } u;
65
66 //in effect u.b = blo + (256 * bhi);
67 u.blo = data[ sectionX4 ];
68 u.bhi = data[ sectionX4 + 1];
69 m = data[ sectionX4 + 2];
70
71 uint8_t secoffset8 = (uint8_t)(offset) / 2;
72
73 uint16_t mx = m * secoffset8;
74
75 int16_t y = mx + u.b;
76 if( theta & 0x8000 ) y = -y;
77
78 return y;
79}
80
81/// Fast 16-bit approximation of sin(x). This approximation never varies more than
82/// 0.69% from the floating point value you'd get by doing
83///
84/// float s = sin(x) * 32767.0;
85///
86/// @param theta input angle from 0-65535
87/// @returns sin of theta, value between -32767 to 32767.
88LIB8STATIC int16_t sin16_C( uint16_t theta )
89{
90 static const uint16_t base[] =
91 { 0, 6393, 12539, 18204, 23170, 27245, 30273, 32137 };
92 static const uint8_t slope[] =
93 { 49, 48, 44, 38, 31, 23, 14, 4 };
94
95 uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047
96 if( theta & 0x4000 ) offset = 2047 - offset;
97
98 uint8_t section = offset / 256; // 0..7
99 uint16_t b = base[section];
100 uint8_t m = slope[section];
101
102 uint8_t secoffset8 = (uint8_t)(offset) / 2;
103
104 uint16_t mx = m * secoffset8;
105 int16_t y = mx + b;
106
107 if( theta & 0x8000 ) y = -y;
108
109 return y;
110}
111
112
113/// Fast 16-bit approximation of cos(x). This approximation never varies more than
114/// 0.69% from the floating point value you'd get by doing
115///
116/// float s = cos(x) * 32767.0;
117///
118/// @param theta input angle from 0-65535
119/// @returns sin of theta, value between -32767 to 32767.
120LIB8STATIC int16_t cos16( uint16_t theta)
121{
122 return sin16( theta + 16384);
123}
124
125///////////////////////////////////////////////////////////////////////
126
127// sin8 & cos8
128// Fast 8-bit approximations of sin(x) & cos(x).
129// Input angle is an unsigned int from 0-255.
130// Output is an unsigned int from 0 to 255.
131//
132// This approximation can vary to to 2%
133// from the floating point value you'd get by doing
134// float s = (sin( x ) * 128.0) + 128;
135//
136// Don't use this approximation for calculating the
137// "real" trigonometric calculations, but it's great
138// for art projects and LED displays.
139//
140// On Arduino/AVR, this approximation is more than
141// 20X faster than floating point sin(x) and cos(x)
142
143#if defined(__AVR__) && !defined(LIB8_ATTINY)
144#define sin8 sin8_avr
145#else
146#define sin8 sin8_C
147#endif
148
149
150const uint8_t b_m16_interleave[] = { 0, 49, 49, 41, 90, 27, 117, 10 };
151
152/// Fast 8-bit approximation of sin(x). This approximation never varies more than
153/// 2% from the floating point value you'd get by doing
154///
155/// float s = (sin(x) * 128.0) + 128;
156///
157/// @param theta input angle from 0-255
158/// @returns sin of theta, value between 0 and 255
159LIB8STATIC uint8_t sin8_avr( uint8_t theta)
160{
161 uint8_t offset = theta;
162
163 asm volatile(
164 "sbrc %[theta],6 \n\t"
165 "com %[offset] \n\t"
166 : [theta] "+r" (theta), [offset] "+r" (offset)
167 );
168
169 offset &= 0x3F; // 0..63
170
171 uint8_t secoffset = offset & 0x0F; // 0..15
172 if( theta & 0x40) secoffset++;
173
174 uint8_t m16; uint8_t b;
175
176 uint8_t section = offset >> 4; // 0..3
177 uint8_t s2 = section * 2;
178
179 const uint8_t* p = b_m16_interleave;
180 p += s2;
181 b = *p;
182 p++;
183 m16 = *p;
184
185 uint8_t mx;
186 uint8_t xr1;
187 asm volatile(
188 "mul %[m16],%[secoffset] \n\t"
189 "mov %[mx],r0 \n\t"
190 "mov %[xr1],r1 \n\t"
191 "eor r1, r1 \n\t"
192 "swap %[mx] \n\t"
193 "andi %[mx],0x0F \n\t"
194 "swap %[xr1] \n\t"
195 "andi %[xr1], 0xF0 \n\t"
196 "or %[mx], %[xr1] \n\t"
197 : [mx] "=d" (mx), [xr1] "=d" (xr1)
198 : [m16] "d" (m16), [secoffset] "d" (secoffset)
199 );
200
201 int8_t y = mx + b;
202 if( theta & 0x80 ) y = -y;
203
204 y += 128;
205
206 return y;
207}
208
209
210/// Fast 8-bit approximation of sin(x). This approximation never varies more than
211/// 2% from the floating point value you'd get by doing
212///
213/// float s = (sin(x) * 128.0) + 128;
214///
215/// @param theta input angle from 0-255
216/// @returns sin of theta, value between 0 and 255
217LIB8STATIC uint8_t sin8_C( uint8_t theta)
218{
219 uint8_t offset = theta;
220 if( theta & 0x40 ) {
221 offset = (uint8_t)255 - offset;
222 }
223 offset &= 0x3F; // 0..63
224
225 uint8_t secoffset = offset & 0x0F; // 0..15
226 if( theta & 0x40) secoffset++;
227
228 uint8_t section = offset >> 4; // 0..3
229 uint8_t s2 = section * 2;
230 const uint8_t* p = b_m16_interleave;
231 p += s2;
232 uint8_t b = *p;
233 p++;
234 uint8_t m16 = *p;
235
236 uint8_t mx = (m16 * secoffset) >> 4;
237
238 int8_t y = mx + b;
239 if( theta & 0x80 ) y = -y;
240
241 y += 128;
242
243 return y;
244}
245
246/// Fast 8-bit approximation of cos(x). This approximation never varies more than
247/// 2% from the floating point value you'd get by doing
248///
249/// float s = (cos(x) * 128.0) + 128;
250///
251/// @param theta input angle from 0-255
252/// @returns sin of theta, value between 0 and 255
253LIB8STATIC uint8_t cos8( uint8_t theta)
254{
255 return sin8( theta + 64);
256}
257
258///@}
259#endif