diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lib8tion/LICENSE | 20 | ||||
-rw-r--r-- | lib/lib8tion/lib8tion.c | 242 | ||||
-rw-r--r-- | lib/lib8tion/lib8tion.h | 934 | ||||
-rw-r--r-- | lib/lib8tion/math8.h | 552 | ||||
-rw-r--r-- | lib/lib8tion/random8.h | 94 | ||||
-rw-r--r-- | lib/lib8tion/scale8.h | 542 | ||||
-rw-r--r-- | lib/lib8tion/trig8.h | 259 |
7 files changed, 2643 insertions, 0 deletions
diff --git a/lib/lib8tion/LICENSE b/lib/lib8tion/LICENSE new file mode 100644 index 000000000..ebe476330 --- /dev/null +++ b/lib/lib8tion/LICENSE | |||
@@ -0,0 +1,20 @@ | |||
1 | The MIT License (MIT) | ||
2 | |||
3 | Copyright (c) 2013 FastLED | ||
4 | |||
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
6 | this software and associated documentation files (the "Software"), to deal in | ||
7 | the Software without restriction, including without limitation the rights to | ||
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
9 | the Software, and to permit persons to whom the Software is furnished to do so, | ||
10 | subject to the following conditions: | ||
11 | |||
12 | The above copyright notice and this permission notice shall be included in all | ||
13 | copies or substantial portions of the Software. | ||
14 | |||
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c new file mode 100644 index 000000000..84b3e9c61 --- /dev/null +++ b/lib/lib8tion/lib8tion.c | |||
@@ -0,0 +1,242 @@ | |||
1 | #define FASTLED_INTERNAL | ||
2 | #include <stdint.h> | ||
3 | |||
4 | #define RAND16_SEED 1337 | ||
5 | uint16_t rand16seed = RAND16_SEED; | ||
6 | |||
7 | |||
8 | // memset8, memcpy8, memmove8: | ||
9 | // optimized avr replacements for the standard "C" library | ||
10 | // routines memset, memcpy, and memmove. | ||
11 | // | ||
12 | // There are two techniques that make these routines | ||
13 | // faster than the standard avr-libc routines. | ||
14 | // First, the loops are unrolled 2X, meaning that | ||
15 | // the average loop overhead is cut in half. | ||
16 | // And second, the compare-and-branch at the bottom | ||
17 | // of each loop decrements the low byte of the | ||
18 | // counter, and if the carry is clear, it branches | ||
19 | // back up immediately. Only if the low byte math | ||
20 | // causes carry do we bother to decrement the high | ||
21 | // byte and check that result for carry as well. | ||
22 | // Results for a 100-byte buffer are 20-40% faster | ||
23 | // than standard avr-libc, at a cost of a few extra | ||
24 | // bytes of code. | ||
25 | |||
26 | #if defined(__AVR__) | ||
27 | //__attribute__ ((noinline)) | ||
28 | void * memset8 ( void * ptr, uint8_t val, uint16_t num ) | ||
29 | { | ||
30 | asm volatile( | ||
31 | " movw r26, %[ptr] \n\t" | ||
32 | " sbrs %A[num], 0 \n\t" | ||
33 | " rjmp Lseteven_%= \n\t" | ||
34 | " rjmp Lsetodd_%= \n\t" | ||
35 | "Lsetloop_%=: \n\t" | ||
36 | " st X+, %[val] \n\t" | ||
37 | "Lsetodd_%=: \n\t" | ||
38 | " st X+, %[val] \n\t" | ||
39 | "Lseteven_%=: \n\t" | ||
40 | " subi %A[num], 2 \n\t" | ||
41 | " brcc Lsetloop_%= \n\t" | ||
42 | " sbci %B[num], 0 \n\t" | ||
43 | " brcc Lsetloop_%= \n\t" | ||
44 | : [num] "+r" (num) | ||
45 | : [ptr] "r" (ptr), | ||
46 | [val] "r" (val) | ||
47 | : "memory" | ||
48 | ); | ||
49 | return ptr; | ||
50 | } | ||
51 | |||
52 | |||
53 | |||
54 | //__attribute__ ((noinline)) | ||
55 | void * memcpy8 ( void * dst, const void* src, uint16_t num ) | ||
56 | { | ||
57 | asm volatile( | ||
58 | " movw r30, %[src] \n\t" | ||
59 | " movw r26, %[dst] \n\t" | ||
60 | " sbrs %A[num], 0 \n\t" | ||
61 | " rjmp Lcpyeven_%= \n\t" | ||
62 | " rjmp Lcpyodd_%= \n\t" | ||
63 | "Lcpyloop_%=: \n\t" | ||
64 | " ld __tmp_reg__, Z+ \n\t" | ||
65 | " st X+, __tmp_reg__ \n\t" | ||
66 | "Lcpyodd_%=: \n\t" | ||
67 | " ld __tmp_reg__, Z+ \n\t" | ||
68 | " st X+, __tmp_reg__ \n\t" | ||
69 | "Lcpyeven_%=: \n\t" | ||
70 | " subi %A[num], 2 \n\t" | ||
71 | " brcc Lcpyloop_%= \n\t" | ||
72 | " sbci %B[num], 0 \n\t" | ||
73 | " brcc Lcpyloop_%= \n\t" | ||
74 | : [num] "+r" (num) | ||
75 | : [src] "r" (src), | ||
76 | [dst] "r" (dst) | ||
77 | : "memory" | ||
78 | ); | ||
79 | return dst; | ||
80 | } | ||
81 | |||
82 | //__attribute__ ((noinline)) | ||
83 | void * memmove8 ( void * dst, const void* src, uint16_t num ) | ||
84 | { | ||
85 | if( src > dst) { | ||
86 | // if src > dst then we can use the forward-stepping memcpy8 | ||
87 | return memcpy8( dst, src, num); | ||
88 | } else { | ||
89 | // if src < dst then we have to step backward: | ||
90 | dst = (char*)dst + num; | ||
91 | src = (char*)src + num; | ||
92 | asm volatile( | ||
93 | " movw r30, %[src] \n\t" | ||
94 | " movw r26, %[dst] \n\t" | ||
95 | " sbrs %A[num], 0 \n\t" | ||
96 | " rjmp Lmoveven_%= \n\t" | ||
97 | " rjmp Lmovodd_%= \n\t" | ||
98 | "Lmovloop_%=: \n\t" | ||
99 | " ld __tmp_reg__, -Z \n\t" | ||
100 | " st -X, __tmp_reg__ \n\t" | ||
101 | "Lmovodd_%=: \n\t" | ||
102 | " ld __tmp_reg__, -Z \n\t" | ||
103 | " st -X, __tmp_reg__ \n\t" | ||
104 | "Lmoveven_%=: \n\t" | ||
105 | " subi %A[num], 2 \n\t" | ||
106 | " brcc Lmovloop_%= \n\t" | ||
107 | " sbci %B[num], 0 \n\t" | ||
108 | " brcc Lmovloop_%= \n\t" | ||
109 | : [num] "+r" (num) | ||
110 | : [src] "r" (src), | ||
111 | [dst] "r" (dst) | ||
112 | : "memory" | ||
113 | ); | ||
114 | return dst; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | #endif /* AVR */ | ||
119 | |||
120 | |||
121 | |||
122 | |||
123 | #if 0 | ||
124 | // TEST / VERIFICATION CODE ONLY BELOW THIS POINT | ||
125 | #include <Arduino.h> | ||
126 | #include "lib8tion.h" | ||
127 | |||
128 | void test1abs( int8_t i) | ||
129 | { | ||
130 | Serial.print("abs("); Serial.print(i); Serial.print(") = "); | ||
131 | int8_t j = abs8(i); | ||
132 | Serial.print(j); Serial.println(" "); | ||
133 | } | ||
134 | |||
135 | void testabs() | ||
136 | { | ||
137 | delay(5000); | ||
138 | for( int8_t q = -128; q != 127; q++) { | ||
139 | test1abs(q); | ||
140 | } | ||
141 | for(;;){}; | ||
142 | } | ||
143 | |||
144 | |||
145 | void testmul8() | ||
146 | { | ||
147 | delay(5000); | ||
148 | byte r, c; | ||
149 | |||
150 | Serial.println("mul8:"); | ||
151 | for( r = 0; r <= 20; r += 1) { | ||
152 | Serial.print(r); Serial.print(" : "); | ||
153 | for( c = 0; c <= 20; c += 1) { | ||
154 | byte t; | ||
155 | t = mul8( r, c); | ||
156 | Serial.print(t); Serial.print(' '); | ||
157 | } | ||
158 | Serial.println(' '); | ||
159 | } | ||
160 | Serial.println("done."); | ||
161 | for(;;){}; | ||
162 | } | ||
163 | |||
164 | |||
165 | void testscale8() | ||
166 | { | ||
167 | delay(5000); | ||
168 | byte r, c; | ||
169 | |||
170 | Serial.println("scale8:"); | ||
171 | for( r = 0; r <= 240; r += 10) { | ||
172 | Serial.print(r); Serial.print(" : "); | ||
173 | for( c = 0; c <= 240; c += 10) { | ||
174 | byte t; | ||
175 | t = scale8( r, c); | ||
176 | Serial.print(t); Serial.print(' '); | ||
177 | } | ||
178 | Serial.println(' '); | ||
179 | } | ||
180 | |||
181 | Serial.println(' '); | ||
182 | Serial.println("scale8_video:"); | ||
183 | |||
184 | for( r = 0; r <= 100; r += 4) { | ||
185 | Serial.print(r); Serial.print(" : "); | ||
186 | for( c = 0; c <= 100; c += 4) { | ||
187 | byte t; | ||
188 | t = scale8_video( r, c); | ||
189 | Serial.print(t); Serial.print(' '); | ||
190 | } | ||
191 | Serial.println(' '); | ||
192 | } | ||
193 | |||
194 | Serial.println("done."); | ||
195 | for(;;){}; | ||
196 | } | ||
197 | |||
198 | |||
199 | |||
200 | void testqadd8() | ||
201 | { | ||
202 | delay(5000); | ||
203 | byte r, c; | ||
204 | for( r = 0; r <= 240; r += 10) { | ||
205 | Serial.print(r); Serial.print(" : "); | ||
206 | for( c = 0; c <= 240; c += 10) { | ||
207 | byte t; | ||
208 | t = qadd8( r, c); | ||
209 | Serial.print(t); Serial.print(' '); | ||
210 | } | ||
211 | Serial.println(' '); | ||
212 | } | ||
213 | Serial.println("done."); | ||
214 | for(;;){}; | ||
215 | } | ||
216 | |||
217 | void testnscale8x3() | ||
218 | { | ||
219 | delay(5000); | ||
220 | byte r, g, b, sc; | ||
221 | for( byte z = 0; z < 10; z++) { | ||
222 | r = random8(); g = random8(); b = random8(); sc = random8(); | ||
223 | |||
224 | Serial.print("nscale8x3_video( "); | ||
225 | Serial.print(r); Serial.print(", "); | ||
226 | Serial.print(g); Serial.print(", "); | ||
227 | Serial.print(b); Serial.print(", "); | ||
228 | Serial.print(sc); Serial.print(") = [ "); | ||
229 | |||
230 | nscale8x3_video( r, g, b, sc); | ||
231 | |||
232 | Serial.print(r); Serial.print(", "); | ||
233 | Serial.print(g); Serial.print(", "); | ||
234 | Serial.print(b); Serial.print("]"); | ||
235 | |||
236 | Serial.println(' '); | ||
237 | } | ||
238 | Serial.println("done."); | ||
239 | for(;;){}; | ||
240 | } | ||
241 | |||
242 | #endif | ||
diff --git a/lib/lib8tion/lib8tion.h b/lib/lib8tion/lib8tion.h new file mode 100644 index 000000000..d93c748e6 --- /dev/null +++ b/lib/lib8tion/lib8tion.h | |||
@@ -0,0 +1,934 @@ | |||
1 | #ifndef __INC_LIB8TION_H | ||
2 | #define __INC_LIB8TION_H | ||
3 | |||
4 | /* | ||
5 | |||
6 | Fast, efficient 8-bit math functions specifically | ||
7 | designed for high-performance LED programming. | ||
8 | |||
9 | Because of the AVR(Arduino) and ARM assembly language | ||
10 | implementations provided, using these functions often | ||
11 | results in smaller and faster code than the equivalent | ||
12 | program using plain "C" arithmetic and logic. | ||
13 | |||
14 | |||
15 | Included are: | ||
16 | |||
17 | |||
18 | - Saturating unsigned 8-bit add and subtract. | ||
19 | Instead of wrapping around if an overflow occurs, | ||
20 | these routines just 'clamp' the output at a maxumum | ||
21 | of 255, or a minimum of 0. Useful for adding pixel | ||
22 | values. E.g., qadd8( 200, 100) = 255. | ||
23 | |||
24 | qadd8( i, j) == MIN( (i + j), 0xFF ) | ||
25 | qsub8( i, j) == MAX( (i - j), 0 ) | ||
26 | |||
27 | - Saturating signed 8-bit ("7-bit") add. | ||
28 | qadd7( i, j) == MIN( (i + j), 0x7F) | ||
29 | |||
30 | |||
31 | - Scaling (down) of unsigned 8- and 16- bit values. | ||
32 | Scaledown value is specified in 1/256ths. | ||
33 | scale8( i, sc) == (i * sc) / 256 | ||
34 | scale16by8( i, sc) == (i * sc) / 256 | ||
35 | |||
36 | Example: scaling a 0-255 value down into a | ||
37 | range from 0-99: | ||
38 | downscaled = scale8( originalnumber, 100); | ||
39 | |||
40 | A special version of scale8 is provided for scaling | ||
41 | LED brightness values, to make sure that they don't | ||
42 | accidentally scale down to total black at low | ||
43 | dimming levels, since that would look wrong: | ||
44 | scale8_video( i, sc) = ((i * sc) / 256) +? 1 | ||
45 | |||
46 | Example: reducing an LED brightness by a | ||
47 | dimming factor: | ||
48 | new_bright = scale8_video( orig_bright, dimming); | ||
49 | |||
50 | |||
51 | - Fast 8- and 16- bit unsigned random numbers. | ||
52 | Significantly faster than Arduino random(), but | ||
53 | also somewhat less random. You can add entropy. | ||
54 | random8() == random from 0..255 | ||
55 | random8( n) == random from 0..(N-1) | ||
56 | random8( n, m) == random from N..(M-1) | ||
57 | |||
58 | random16() == random from 0..65535 | ||
59 | random16( n) == random from 0..(N-1) | ||
60 | random16( n, m) == random from N..(M-1) | ||
61 | |||
62 | random16_set_seed( k) == seed = k | ||
63 | random16_add_entropy( k) == seed += k | ||
64 | |||
65 | |||
66 | - Absolute value of a signed 8-bit value. | ||
67 | abs8( i) == abs( i) | ||
68 | |||
69 | |||
70 | - 8-bit math operations which return 8-bit values. | ||
71 | These are provided mostly for completeness, | ||
72 | not particularly for performance. | ||
73 | mul8( i, j) == (i * j) & 0xFF | ||
74 | add8( i, j) == (i + j) & 0xFF | ||
75 | sub8( i, j) == (i - j) & 0xFF | ||
76 | |||
77 | |||
78 | - Fast 16-bit approximations of sin and cos. | ||
79 | Input angle is a uint16_t from 0-65535. | ||
80 | Output is a signed int16_t from -32767 to 32767. | ||
81 | sin16( x) == sin( (x/32768.0) * pi) * 32767 | ||
82 | cos16( x) == cos( (x/32768.0) * pi) * 32767 | ||
83 | Accurate to more than 99% in all cases. | ||
84 | |||
85 | - Fast 8-bit approximations of sin and cos. | ||
86 | Input angle is a uint8_t from 0-255. | ||
87 | Output is an UNsigned uint8_t from 0 to 255. | ||
88 | sin8( x) == (sin( (x/128.0) * pi) * 128) + 128 | ||
89 | cos8( x) == (cos( (x/128.0) * pi) * 128) + 128 | ||
90 | Accurate to within about 2%. | ||
91 | |||
92 | |||
93 | - Fast 8-bit "easing in/out" function. | ||
94 | ease8InOutCubic(x) == 3(x^i) - 2(x^3) | ||
95 | ease8InOutApprox(x) == | ||
96 | faster, rougher, approximation of cubic easing | ||
97 | ease8InOutQuad(x) == quadratic (vs cubic) easing | ||
98 | |||
99 | - Cubic, Quadratic, and Triangle wave functions. | ||
100 | Input is a uint8_t representing phase withing the wave, | ||
101 | similar to how sin8 takes an angle 'theta'. | ||
102 | Output is a uint8_t representing the amplitude of | ||
103 | the wave at that point. | ||
104 | cubicwave8( x) | ||
105 | quadwave8( x) | ||
106 | triwave8( x) | ||
107 | |||
108 | - Square root for 16-bit integers. About three times | ||
109 | faster and five times smaller than Arduino's built-in | ||
110 | generic 32-bit sqrt routine. | ||
111 | sqrt16( uint16_t x ) == sqrt( x) | ||
112 | |||
113 | - Dimming and brightening functions for 8-bit | ||
114 | light values. | ||
115 | dim8_video( x) == scale8_video( x, x) | ||
116 | dim8_raw( x) == scale8( x, x) | ||
117 | dim8_lin( x) == (x<128) ? ((x+1)/2) : scale8(x,x) | ||
118 | brighten8_video( x) == 255 - dim8_video( 255 - x) | ||
119 | brighten8_raw( x) == 255 - dim8_raw( 255 - x) | ||
120 | brighten8_lin( x) == 255 - dim8_lin( 255 - x) | ||
121 | The dimming functions in particular are suitable | ||
122 | for making LED light output appear more 'linear'. | ||
123 | |||
124 | |||
125 | - Linear interpolation between two values, with the | ||
126 | fraction between them expressed as an 8- or 16-bit | ||
127 | fixed point fraction (fract8 or fract16). | ||
128 | lerp8by8( fromU8, toU8, fract8 ) | ||
129 | lerp16by8( fromU16, toU16, fract8 ) | ||
130 | lerp15by8( fromS16, toS16, fract8 ) | ||
131 | == from + (( to - from ) * fract8) / 256) | ||
132 | lerp16by16( fromU16, toU16, fract16 ) | ||
133 | == from + (( to - from ) * fract16) / 65536) | ||
134 | map8( in, rangeStart, rangeEnd) | ||
135 | == map( in, 0, 255, rangeStart, rangeEnd); | ||
136 | |||
137 | - Optimized memmove, memcpy, and memset, that are | ||
138 | faster than standard avr-libc 1.8. | ||
139 | memmove8( dest, src, bytecount) | ||
140 | memcpy8( dest, src, bytecount) | ||
141 | memset8( buf, value, bytecount) | ||
142 | |||
143 | - Beat generators which return sine or sawtooth | ||
144 | waves in a specified number of Beats Per Minute. | ||
145 | Sine wave beat generators can specify a low and | ||
146 | high range for the output. Sawtooth wave beat | ||
147 | generators always range 0-255 or 0-65535. | ||
148 | beatsin8( BPM, low8, high8) | ||
149 | = (sine(beatphase) * (high8-low8)) + low8 | ||
150 | beatsin16( BPM, low16, high16) | ||
151 | = (sine(beatphase) * (high16-low16)) + low16 | ||
152 | beatsin88( BPM88, low16, high16) | ||
153 | = (sine(beatphase) * (high16-low16)) + low16 | ||
154 | beat8( BPM) = 8-bit repeating sawtooth wave | ||
155 | beat16( BPM) = 16-bit repeating sawtooth wave | ||
156 | beat88( BPM88) = 16-bit repeating sawtooth wave | ||
157 | BPM is beats per minute in either simple form | ||
158 | e.g. 120, or Q8.8 fixed-point form. | ||
159 | BPM88 is beats per minute in ONLY Q8.8 fixed-point | ||
160 | form. | ||
161 | |||
162 | Lib8tion is pronounced like 'libation': lie-BAY-shun | ||
163 | |||
164 | */ | ||
165 | |||
166 | |||
167 | |||
168 | #include <stdint.h> | ||
169 | |||
170 | #define LIB8STATIC __attribute__ ((unused)) static inline | ||
171 | #define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline | ||
172 | |||
173 | #if !defined(__AVR__) | ||
174 | #include <string.h> | ||
175 | // for memmove, memcpy, and memset if not defined here | ||
176 | #endif | ||
177 | |||
178 | #if defined(__arm__) | ||
179 | |||
180 | #if defined(FASTLED_TEENSY3) | ||
181 | // Can use Cortex M4 DSP instructions | ||
182 | #define QADD8_C 0 | ||
183 | #define QADD7_C 0 | ||
184 | #define QADD8_ARM_DSP_ASM 1 | ||
185 | #define QADD7_ARM_DSP_ASM 1 | ||
186 | #else | ||
187 | // Generic ARM | ||
188 | #define QADD8_C 1 | ||
189 | #define QADD7_C 1 | ||
190 | #endif | ||
191 | |||
192 | #define QSUB8_C 1 | ||
193 | #define SCALE8_C 1 | ||
194 | #define SCALE16BY8_C 1 | ||
195 | #define SCALE16_C 1 | ||
196 | #define ABS8_C 1 | ||
197 | #define MUL8_C 1 | ||
198 | #define QMUL8_C 1 | ||
199 | #define ADD8_C 1 | ||
200 | #define SUB8_C 1 | ||
201 | #define EASE8_C 1 | ||
202 | #define AVG8_C 1 | ||
203 | #define AVG7_C 1 | ||
204 | #define AVG16_C 1 | ||
205 | #define AVG15_C 1 | ||
206 | #define BLEND8_C 1 | ||
207 | |||
208 | |||
209 | #elif defined(__AVR__) | ||
210 | |||
211 | // AVR ATmega and friends Arduino | ||
212 | |||
213 | #define QADD8_C 0 | ||
214 | #define QADD7_C 0 | ||
215 | #define QSUB8_C 0 | ||
216 | #define ABS8_C 0 | ||
217 | #define ADD8_C 0 | ||
218 | #define SUB8_C 0 | ||
219 | #define AVG8_C 0 | ||
220 | #define AVG7_C 0 | ||
221 | #define AVG16_C 0 | ||
222 | #define AVG15_C 0 | ||
223 | |||
224 | #define QADD8_AVRASM 1 | ||
225 | #define QADD7_AVRASM 1 | ||
226 | #define QSUB8_AVRASM 1 | ||
227 | #define ABS8_AVRASM 1 | ||
228 | #define ADD8_AVRASM 1 | ||
229 | #define SUB8_AVRASM 1 | ||
230 | #define AVG8_AVRASM 1 | ||
231 | #define AVG7_AVRASM 1 | ||
232 | #define AVG16_AVRASM 1 | ||
233 | #define AVG15_AVRASM 1 | ||
234 | |||
235 | // Note: these require hardware MUL instruction | ||
236 | // -- sorry, ATtiny! | ||
237 | #if !defined(LIB8_ATTINY) | ||
238 | #define SCALE8_C 0 | ||
239 | #define SCALE16BY8_C 0 | ||
240 | #define SCALE16_C 0 | ||
241 | #define MUL8_C 0 | ||
242 | #define QMUL8_C 0 | ||
243 | #define EASE8_C 0 | ||
244 | #define BLEND8_C 0 | ||
245 | #define SCALE8_AVRASM 1 | ||
246 | #define SCALE16BY8_AVRASM 1 | ||
247 | #define SCALE16_AVRASM 1 | ||
248 | #define MUL8_AVRASM 1 | ||
249 | #define QMUL8_AVRASM 1 | ||
250 | #define EASE8_AVRASM 1 | ||
251 | #define CLEANUP_R1_AVRASM 1 | ||
252 | #define BLEND8_AVRASM 1 | ||
253 | #else | ||
254 | // On ATtiny, we just use C implementations | ||
255 | #define SCALE8_C 1 | ||
256 | #define SCALE16BY8_C 1 | ||
257 | #define SCALE16_C 1 | ||
258 | #define MUL8_C 1 | ||
259 | #define QMUL8_C 1 | ||
260 | #define EASE8_C 1 | ||
261 | #define BLEND8_C 1 | ||
262 | #define SCALE8_AVRASM 0 | ||
263 | #define SCALE16BY8_AVRASM 0 | ||
264 | #define SCALE16_AVRASM 0 | ||
265 | #define MUL8_AVRASM 0 | ||
266 | #define QMUL8_AVRASM 0 | ||
267 | #define EASE8_AVRASM 0 | ||
268 | #define BLEND8_AVRASM 0 | ||
269 | #endif | ||
270 | |||
271 | #else | ||
272 | |||
273 | // unspecified architecture, so | ||
274 | // no ASM, everything in C | ||
275 | #define QADD8_C 1 | ||
276 | #define QADD7_C 1 | ||
277 | #define QSUB8_C 1 | ||
278 | #define SCALE8_C 1 | ||
279 | #define SCALE16BY8_C 1 | ||
280 | #define SCALE16_C 1 | ||
281 | #define ABS8_C 1 | ||
282 | #define MUL8_C 1 | ||
283 | #define QMUL8_C 1 | ||
284 | #define ADD8_C 1 | ||
285 | #define SUB8_C 1 | ||
286 | #define EASE8_C 1 | ||
287 | #define AVG8_C 1 | ||
288 | #define AVG7_C 1 | ||
289 | #define AVG16_C 1 | ||
290 | #define AVG15_C 1 | ||
291 | #define BLEND8_C 1 | ||
292 | |||
293 | #endif | ||
294 | |||
295 | ///@defgroup lib8tion Fast math functions | ||
296 | ///A variety of functions for working with numbers. | ||
297 | ///@{ | ||
298 | |||
299 | |||
300 | /////////////////////////////////////////////////////////////////////// | ||
301 | // | ||
302 | // typdefs for fixed-point fractional types. | ||
303 | // | ||
304 | // sfract7 should be interpreted as signed 128ths. | ||
305 | // fract8 should be interpreted as unsigned 256ths. | ||
306 | // sfract15 should be interpreted as signed 32768ths. | ||
307 | // fract16 should be interpreted as unsigned 65536ths. | ||
308 | // | ||
309 | // Example: if a fract8 has the value "64", that should be interpreted | ||
310 | // as 64/256ths, or one-quarter. | ||
311 | // | ||
312 | // | ||
313 | // fract8 range is 0 to 0.99609375 | ||
314 | // in steps of 0.00390625 | ||
315 | // | ||
316 | // sfract7 range is -0.9921875 to 0.9921875 | ||
317 | // in steps of 0.0078125 | ||
318 | // | ||
319 | // fract16 range is 0 to 0.99998474121 | ||
320 | // in steps of 0.00001525878 | ||
321 | // | ||
322 | // sfract15 range is -0.99996948242 to 0.99996948242 | ||
323 | // in steps of 0.00003051757 | ||
324 | // | ||
325 | |||
326 | /// ANSI unsigned short _Fract. range is 0 to 0.99609375 | ||
327 | /// in steps of 0.00390625 | ||
328 | typedef uint8_t fract8; ///< ANSI: unsigned short _Fract | ||
329 | |||
330 | /// ANSI: signed short _Fract. range is -0.9921875 to 0.9921875 | ||
331 | /// in steps of 0.0078125 | ||
332 | typedef int8_t sfract7; ///< ANSI: signed short _Fract | ||
333 | |||
334 | /// ANSI: unsigned _Fract. range is 0 to 0.99998474121 | ||
335 | /// in steps of 0.00001525878 | ||
336 | typedef uint16_t fract16; ///< ANSI: unsigned _Fract | ||
337 | |||
338 | /// ANSI: signed _Fract. range is -0.99996948242 to 0.99996948242 | ||
339 | /// in steps of 0.00003051757 | ||
340 | typedef int16_t sfract15; ///< ANSI: signed _Fract | ||
341 | |||
342 | |||
343 | // accumXY types should be interpreted as X bits of integer, | ||
344 | // and Y bits of fraction. | ||
345 | // E.g., accum88 has 8 bits of int, 8 bits of fraction | ||
346 | |||
347 | typedef uint16_t accum88; ///< ANSI: unsigned short _Accum. 8 bits int, 8 bits fraction | ||
348 | typedef int16_t saccum78; ///< ANSI: signed short _Accum. 7 bits int, 8 bits fraction | ||
349 | typedef uint32_t accum1616;///< ANSI: signed _Accum. 16 bits int, 16 bits fraction | ||
350 | typedef int32_t saccum1516;///< ANSI: signed _Accum. 15 bits int, 16 bits fraction | ||
351 | typedef uint16_t accum124; ///< no direct ANSI counterpart. 12 bits int, 4 bits fraction | ||
352 | typedef int32_t saccum114;///< no direct ANSI counterpart. 1 bit int, 14 bits fraction | ||
353 | |||
354 | |||
355 | |||
356 | #include "math8.h" | ||
357 | #include "scale8.h" | ||
358 | #include "random8.h" | ||
359 | #include "trig8.h" | ||
360 | |||
361 | /////////////////////////////////////////////////////////////////////// | ||
362 | |||
363 | |||
364 | |||
365 | |||
366 | |||
367 | |||
368 | |||
369 | /////////////////////////////////////////////////////////////////////// | ||
370 | // | ||
371 | // float-to-fixed and fixed-to-float conversions | ||
372 | // | ||
373 | // Note that anything involving a 'float' on AVR will be slower. | ||
374 | |||
375 | /// sfract15ToFloat: conversion from sfract15 fixed point to | ||
376 | /// IEEE754 32-bit float. | ||
377 | LIB8STATIC float sfract15ToFloat( sfract15 y) | ||
378 | { | ||
379 | return y / 32768.0; | ||
380 | } | ||
381 | |||
382 | /// conversion from IEEE754 float in the range (-1,1) | ||
383 | /// to 16-bit fixed point. Note that the extremes of | ||
384 | /// one and negative one are NOT representable. The | ||
385 | /// representable range is basically | ||
386 | LIB8STATIC sfract15 floatToSfract15( float f) | ||
387 | { | ||
388 | return f * 32768.0; | ||
389 | } | ||
390 | |||
391 | |||
392 | |||
393 | /////////////////////////////////////////////////////////////////////// | ||
394 | // | ||
395 | // memmove8, memcpy8, and memset8: | ||
396 | // alternatives to memmove, memcpy, and memset that are | ||
397 | // faster on AVR than standard avr-libc 1.8 | ||
398 | |||
399 | #if defined(__AVR__) | ||
400 | void * memmove8( void * dst, const void * src, uint16_t num ); | ||
401 | void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline)); | ||
402 | void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ; | ||
403 | #else | ||
404 | // on non-AVR platforms, these names just call standard libc. | ||
405 | #define memmove8 memmove | ||
406 | #define memcpy8 memcpy | ||
407 | #define memset8 memset | ||
408 | #endif | ||
409 | |||
410 | |||
411 | /////////////////////////////////////////////////////////////////////// | ||
412 | // | ||
413 | // linear interpolation, such as could be used for Perlin noise, etc. | ||
414 | // | ||
415 | |||
416 | // A note on the structure of the lerp functions: | ||
417 | // The cases for b>a and b<=a are handled separately for | ||
418 | // speed: without knowing the relative order of a and b, | ||
419 | // the value (a-b) might be overflow the width of a or b, | ||
420 | // and have to be promoted to a wider, slower type. | ||
421 | // To avoid that, we separate the two cases, and are able | ||
422 | // to do all the math in the same width as the arguments, | ||
423 | // which is much faster and smaller on AVR. | ||
424 | |||
425 | /// linear interpolation between two unsigned 8-bit values, | ||
426 | /// with 8-bit fraction | ||
427 | LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac) | ||
428 | { | ||
429 | uint8_t result; | ||
430 | if( b > a) { | ||
431 | uint8_t delta = b - a; | ||
432 | uint8_t scaled = scale8( delta, frac); | ||
433 | result = a + scaled; | ||
434 | } else { | ||
435 | uint8_t delta = a - b; | ||
436 | uint8_t scaled = scale8( delta, frac); | ||
437 | result = a - scaled; | ||
438 | } | ||
439 | return result; | ||
440 | } | ||
441 | |||
442 | /// linear interpolation between two unsigned 16-bit values, | ||
443 | /// with 16-bit fraction | ||
444 | LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac) | ||
445 | { | ||
446 | uint16_t result; | ||
447 | if( b > a ) { | ||
448 | uint16_t delta = b - a; | ||
449 | uint16_t scaled = scale16(delta, frac); | ||
450 | result = a + scaled; | ||
451 | } else { | ||
452 | uint16_t delta = a - b; | ||
453 | uint16_t scaled = scale16( delta, frac); | ||
454 | result = a - scaled; | ||
455 | } | ||
456 | return result; | ||
457 | } | ||
458 | |||
459 | /// linear interpolation between two unsigned 16-bit values, | ||
460 | /// with 8-bit fraction | ||
461 | LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac) | ||
462 | { | ||
463 | uint16_t result; | ||
464 | if( b > a) { | ||
465 | uint16_t delta = b - a; | ||
466 | uint16_t scaled = scale16by8( delta, frac); | ||
467 | result = a + scaled; | ||
468 | } else { | ||
469 | uint16_t delta = a - b; | ||
470 | uint16_t scaled = scale16by8( delta, frac); | ||
471 | result = a - scaled; | ||
472 | } | ||
473 | return result; | ||
474 | } | ||
475 | |||
476 | /// linear interpolation between two signed 15-bit values, | ||
477 | /// with 8-bit fraction | ||
478 | LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac) | ||
479 | { | ||
480 | int16_t result; | ||
481 | if( b > a) { | ||
482 | uint16_t delta = b - a; | ||
483 | uint16_t scaled = scale16by8( delta, frac); | ||
484 | result = a + scaled; | ||
485 | } else { | ||
486 | uint16_t delta = a - b; | ||
487 | uint16_t scaled = scale16by8( delta, frac); | ||
488 | result = a - scaled; | ||
489 | } | ||
490 | return result; | ||
491 | } | ||
492 | |||
493 | /// linear interpolation between two signed 15-bit values, | ||
494 | /// with 8-bit fraction | ||
495 | LIB8STATIC int16_t lerp15by16( int16_t a, int16_t b, fract16 frac) | ||
496 | { | ||
497 | int16_t result; | ||
498 | if( b > a) { | ||
499 | uint16_t delta = b - a; | ||
500 | uint16_t scaled = scale16( delta, frac); | ||
501 | result = a + scaled; | ||
502 | } else { | ||
503 | uint16_t delta = a - b; | ||
504 | uint16_t scaled = scale16( delta, frac); | ||
505 | result = a - scaled; | ||
506 | } | ||
507 | return result; | ||
508 | } | ||
509 | |||
510 | /// map8: map from one full-range 8-bit value into a narrower | ||
511 | /// range of 8-bit values, possibly a range of hues. | ||
512 | /// | ||
513 | /// E.g. map myValue into a hue in the range blue..purple..pink..red | ||
514 | /// hue = map8( myValue, HUE_BLUE, HUE_RED); | ||
515 | /// | ||
516 | /// Combines nicely with the waveform functions (like sin8, etc) | ||
517 | /// to produce continuous hue gradients back and forth: | ||
518 | /// | ||
519 | /// hue = map8( sin8( myValue), HUE_BLUE, HUE_RED); | ||
520 | /// | ||
521 | /// Mathematically simiar to lerp8by8, but arguments are more | ||
522 | /// like Arduino's "map"; this function is similar to | ||
523 | /// | ||
524 | /// map( in, 0, 255, rangeStart, rangeEnd) | ||
525 | /// | ||
526 | /// but faster and specifically designed for 8-bit values. | ||
527 | LIB8STATIC uint8_t map8( uint8_t in, uint8_t rangeStart, uint8_t rangeEnd) | ||
528 | { | ||
529 | uint8_t rangeWidth = rangeEnd - rangeStart; | ||
530 | uint8_t out = scale8( in, rangeWidth); | ||
531 | out += rangeStart; | ||
532 | return out; | ||
533 | } | ||
534 | |||
535 | |||
536 | /////////////////////////////////////////////////////////////////////// | ||
537 | // | ||
538 | // easing functions; see http://easings.net | ||
539 | // | ||
540 | |||
541 | /// ease8InOutQuad: 8-bit quadratic ease-in / ease-out function | ||
542 | /// Takes around 13 cycles on AVR | ||
543 | #if EASE8_C == 1 | ||
544 | LIB8STATIC uint8_t ease8InOutQuad( uint8_t i) | ||
545 | { | ||
546 | uint8_t j = i; | ||
547 | if( j & 0x80 ) { | ||
548 | j = 255 - j; | ||
549 | } | ||
550 | uint8_t jj = scale8( j, j); | ||
551 | uint8_t jj2 = jj << 1; | ||
552 | if( i & 0x80 ) { | ||
553 | jj2 = 255 - jj2; | ||
554 | } | ||
555 | return jj2; | ||
556 | } | ||
557 | |||
558 | #elif EASE8_AVRASM == 1 | ||
559 | // This AVR asm version of ease8InOutQuad preserves one more | ||
560 | // low-bit of precision than the C version, and is also slightly | ||
561 | // smaller and faster. | ||
562 | LIB8STATIC uint8_t ease8InOutQuad(uint8_t val) { | ||
563 | uint8_t j=val; | ||
564 | asm volatile ( | ||
565 | "sbrc %[val], 7 \n" | ||
566 | "com %[j] \n" | ||
567 | "mul %[j], %[j] \n" | ||
568 | "add r0, %[j] \n" | ||
569 | "ldi %[j], 0 \n" | ||
570 | "adc %[j], r1 \n" | ||
571 | "lsl r0 \n" // carry = high bit of low byte of mul product | ||
572 | "rol %[j] \n" // j = (j * 2) + carry // preserve add'l bit of precision | ||
573 | "sbrc %[val], 7 \n" | ||
574 | "com %[j] \n" | ||
575 | "clr __zero_reg__ \n" | ||
576 | : [j] "+&a" (j) | ||
577 | : [val] "a" (val) | ||
578 | : "r0", "r1" | ||
579 | ); | ||
580 | return j; | ||
581 | } | ||
582 | |||
583 | #else | ||
584 | #error "No implementation for ease8InOutQuad available." | ||
585 | #endif | ||
586 | |||
587 | /// ease16InOutQuad: 16-bit quadratic ease-in / ease-out function | ||
588 | // C implementation at this point | ||
589 | LIB8STATIC uint16_t ease16InOutQuad( uint16_t i) | ||
590 | { | ||
591 | uint16_t j = i; | ||
592 | if( j & 0x8000 ) { | ||
593 | j = 65535 - j; | ||
594 | } | ||
595 | uint16_t jj = scale16( j, j); | ||
596 | uint16_t jj2 = jj << 1; | ||
597 | if( i & 0x8000 ) { | ||
598 | jj2 = 65535 - jj2; | ||
599 | } | ||
600 | return jj2; | ||
601 | } | ||
602 | |||
603 | |||
604 | /// ease8InOutCubic: 8-bit cubic ease-in / ease-out function | ||
605 | /// Takes around 18 cycles on AVR | ||
606 | LIB8STATIC fract8 ease8InOutCubic( fract8 i) | ||
607 | { | ||
608 | uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i); | ||
609 | uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i); | ||
610 | |||
611 | uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii)); | ||
612 | |||
613 | /* the code generated for the above *'s automatically | ||
614 | cleans up R1, so there's no need to explicitily call | ||
615 | cleanup_R1(); */ | ||
616 | |||
617 | uint8_t result = r1; | ||
618 | |||
619 | // if we got "256", return 255: | ||
620 | if( r1 & 0x100 ) { | ||
621 | result = 255; | ||
622 | } | ||
623 | return result; | ||
624 | } | ||
625 | |||
626 | /// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function | ||
627 | /// shaped approximately like 'ease8InOutCubic', | ||
628 | /// it's never off by more than a couple of percent | ||
629 | /// from the actual cubic S-curve, and it executes | ||
630 | /// more than twice as fast. Use when the cycles | ||
631 | /// are more important than visual smoothness. | ||
632 | /// Asm version takes around 7 cycles on AVR. | ||
633 | |||
634 | #if EASE8_C == 1 | ||
635 | LIB8STATIC fract8 ease8InOutApprox( fract8 i) | ||
636 | { | ||
637 | if( i < 64) { | ||
638 | // start with slope 0.5 | ||
639 | i /= 2; | ||
640 | } else if( i > (255 - 64)) { | ||
641 | // end with slope 0.5 | ||
642 | i = 255 - i; | ||
643 | i /= 2; | ||
644 | i = 255 - i; | ||
645 | } else { | ||
646 | // in the middle, use slope 192/128 = 1.5 | ||
647 | i -= 64; | ||
648 | i += (i / 2); | ||
649 | i += 32; | ||
650 | } | ||
651 | |||
652 | return i; | ||
653 | } | ||
654 | |||
655 | #elif EASE8_AVRASM == 1 | ||
656 | LIB8STATIC uint8_t ease8InOutApprox( fract8 i) | ||
657 | { | ||
658 | // takes around 7 cycles on AVR | ||
659 | asm volatile ( | ||
660 | " subi %[i], 64 \n\t" | ||
661 | " cpi %[i], 128 \n\t" | ||
662 | " brcc Lshift_%= \n\t" | ||
663 | |||
664 | // middle case | ||
665 | " mov __tmp_reg__, %[i] \n\t" | ||
666 | " lsr __tmp_reg__ \n\t" | ||
667 | " add %[i], __tmp_reg__ \n\t" | ||
668 | " subi %[i], 224 \n\t" | ||
669 | " rjmp Ldone_%= \n\t" | ||
670 | |||
671 | // start or end case | ||
672 | "Lshift_%=: \n\t" | ||
673 | " lsr %[i] \n\t" | ||
674 | " subi %[i], 96 \n\t" | ||
675 | |||
676 | "Ldone_%=: \n\t" | ||
677 | |||
678 | : [i] "+&a" (i) | ||
679 | : | ||
680 | : "r0", "r1" | ||
681 | ); | ||
682 | return i; | ||
683 | } | ||
684 | #else | ||
685 | #error "No implementation for ease8 available." | ||
686 | #endif | ||
687 | |||
688 | |||
689 | |||
690 | /// triwave8: triangle (sawtooth) wave generator. Useful for | ||
691 | /// turning a one-byte ever-increasing value into a | ||
692 | /// one-byte value that oscillates up and down. | ||
693 | /// | ||
694 | /// input output | ||
695 | /// 0..127 0..254 (positive slope) | ||
696 | /// 128..255 254..0 (negative slope) | ||
697 | /// | ||
698 | /// On AVR this function takes just three cycles. | ||
699 | /// | ||
700 | LIB8STATIC uint8_t triwave8(uint8_t in) | ||
701 | { | ||
702 | if( in & 0x80) { | ||
703 | in = 255 - in; | ||
704 | } | ||
705 | uint8_t out = in << 1; | ||
706 | return out; | ||
707 | } | ||
708 | |||
709 | |||
710 | // quadwave8 and cubicwave8: S-shaped wave generators (like 'sine'). | ||
711 | // Useful for turning a one-byte 'counter' value into a | ||
712 | // one-byte oscillating value that moves smoothly up and down, | ||
713 | // with an 'acceleration' and 'deceleration' curve. | ||
714 | // | ||
715 | // These are even faster than 'sin8', and have | ||
716 | // slightly different curve shapes. | ||
717 | // | ||
718 | |||
719 | /// quadwave8: quadratic waveform generator. Spends just a little more | ||
720 | /// time at the limits than 'sine' does. | ||
721 | LIB8STATIC uint8_t quadwave8(uint8_t in) | ||
722 | { | ||
723 | return ease8InOutQuad( triwave8( in)); | ||
724 | } | ||
725 | |||
726 | /// cubicwave8: cubic waveform generator. Spends visibly more time | ||
727 | /// at the limits than 'sine' does. | ||
728 | LIB8STATIC uint8_t cubicwave8(uint8_t in) | ||
729 | { | ||
730 | return ease8InOutCubic( triwave8( in)); | ||
731 | } | ||
732 | |||
733 | /// squarewave8: square wave generator. Useful for | ||
734 | /// turning a one-byte ever-increasing value | ||
735 | /// into a one-byte value that is either 0 or 255. | ||
736 | /// The width of the output 'pulse' is | ||
737 | /// determined by the pulsewidth argument: | ||
738 | /// | ||
739 | ///~~~ | ||
740 | /// If pulsewidth is 255, output is always 255. | ||
741 | /// If pulsewidth < 255, then | ||
742 | /// if input < pulsewidth then output is 255 | ||
743 | /// if input >= pulsewidth then output is 0 | ||
744 | ///~~~ | ||
745 | /// | ||
746 | /// the output looking like: | ||
747 | /// | ||
748 | ///~~~ | ||
749 | /// 255 +--pulsewidth--+ | ||
750 | /// . | | | ||
751 | /// 0 0 +--------(256-pulsewidth)-------- | ||
752 | ///~~~ | ||
753 | /// | ||
754 | /// @param in | ||
755 | /// @param pulsewidth | ||
756 | /// @returns square wave output | ||
757 | LIB8STATIC uint8_t squarewave8( uint8_t in, uint8_t pulsewidth) | ||
758 | { | ||
759 | if( in < pulsewidth || (pulsewidth == 255)) { | ||
760 | return 255; | ||
761 | } else { | ||
762 | return 0; | ||
763 | } | ||
764 | } | ||
765 | |||
766 | |||
767 | // Beat generators - These functions produce waves at a given | ||
768 | // number of 'beats per minute'. Internally, they use | ||
769 | // the Arduino function 'millis' to track elapsed time. | ||
770 | // Accuracy is a bit better than one part in a thousand. | ||
771 | // | ||
772 | // beat8( BPM ) returns an 8-bit value that cycles 'BPM' times | ||
773 | // per minute, rising from 0 to 255, resetting to zero, | ||
774 | // rising up again, etc.. The output of this function | ||
775 | // is suitable for feeding directly into sin8, and cos8, | ||
776 | // triwave8, quadwave8, and cubicwave8. | ||
777 | // beat16( BPM ) returns a 16-bit value that cycles 'BPM' times | ||
778 | // per minute, rising from 0 to 65535, resetting to zero, | ||
779 | // rising up again, etc. The output of this function is | ||
780 | // suitable for feeding directly into sin16 and cos16. | ||
781 | // beat88( BPM88) is the same as beat16, except that the BPM88 argument | ||
782 | // MUST be in Q8.8 fixed point format, e.g. 120BPM must | ||
783 | // be specified as 120*256 = 30720. | ||
784 | // beatsin8( BPM, uint8_t low, uint8_t high) returns an 8-bit value that | ||
785 | // rises and falls in a sine wave, 'BPM' times per minute, | ||
786 | // between the values of 'low' and 'high'. | ||
787 | // beatsin16( BPM, uint16_t low, uint16_t high) returns a 16-bit value | ||
788 | // that rises and falls in a sine wave, 'BPM' times per | ||
789 | // minute, between the values of 'low' and 'high'. | ||
790 | // beatsin88( BPM88, ...) is the same as beatsin16, except that the | ||
791 | // BPM88 argument MUST be in Q8.8 fixed point format, | ||
792 | // e.g. 120BPM must be specified as 120*256 = 30720. | ||
793 | // | ||
794 | // BPM can be supplied two ways. The simpler way of specifying BPM is as | ||
795 | // a simple 8-bit integer from 1-255, (e.g., "120"). | ||
796 | // The more sophisticated way of specifying BPM allows for fractional | ||
797 | // "Q8.8" fixed point number (an 'accum88') with an 8-bit integer part and | ||
798 | // an 8-bit fractional part. The easiest way to construct this is to multiply | ||
799 | // a floating point BPM value (e.g. 120.3) by 256, (e.g. resulting in 30796 | ||
800 | // in this case), and pass that as the 16-bit BPM argument. | ||
801 | // "BPM88" MUST always be specified in Q8.8 format. | ||
802 | // | ||
803 | // Originally designed to make an entire animation project pulse with brightness. | ||
804 | // For that effect, add this line just above your existing call to "FastLED.show()": | ||
805 | // | ||
806 | // uint8_t bright = beatsin8( 60 /*BPM*/, 192 /*dimmest*/, 255 /*brightest*/ )); | ||
807 | // FastLED.setBrightness( bright ); | ||
808 | // FastLED.show(); | ||
809 | // | ||
810 | // The entire animation will now pulse between brightness 192 and 255 once per second. | ||
811 | |||
812 | |||
813 | // The beat generators need access to a millisecond counter. | ||
814 | // On Arduino, this is "millis()". On other platforms, you'll | ||
815 | // need to provide a function with this signature: | ||
816 | // uint32_t get_millisecond_timer(); | ||
817 | // that provides similar functionality. | ||
818 | // You can also force use of the get_millisecond_timer function | ||
819 | // by #defining USE_GET_MILLISECOND_TIMER. | ||
820 | #if (defined(ARDUINO) || defined(SPARK) || defined(FASTLED_HAS_MILLIS)) && !defined(USE_GET_MILLISECOND_TIMER) | ||
821 | // Forward declaration of Arduino function 'millis'. | ||
822 | //uint32_t millis(); | ||
823 | #define GET_MILLIS millis | ||
824 | #else | ||
825 | uint32_t get_millisecond_timer(void); | ||
826 | #define GET_MILLIS get_millisecond_timer | ||
827 | #endif | ||
828 | |||
829 | // beat16 generates a 16-bit 'sawtooth' wave at a given BPM, | ||
830 | /// with BPM specified in Q8.8 fixed-point format; e.g. | ||
831 | /// for this function, 120 BPM MUST BE specified as | ||
832 | /// 120*256 = 30720. | ||
833 | /// If you just want to specify "120", use beat16 or beat8. | ||
834 | LIB8STATIC uint16_t beat88( accum88 beats_per_minute_88, uint32_t timebase) | ||
835 | { | ||
836 | // BPM is 'beats per minute', or 'beats per 60000ms'. | ||
837 | // To avoid using the (slower) division operator, we | ||
838 | // want to convert 'beats per 60000ms' to 'beats per 65536ms', | ||
839 | // and then use a simple, fast bit-shift to divide by 65536. | ||
840 | // | ||
841 | // The ratio 65536:60000 is 279.620266667:256; we'll call it 280:256. | ||
842 | // The conversion is accurate to about 0.05%, more or less, | ||
843 | // e.g. if you ask for "120 BPM", you'll get about "119.93". | ||
844 | return (((GET_MILLIS()) - timebase) * beats_per_minute_88 * 280) >> 16; | ||
845 | } | ||
846 | |||
847 | /// beat16 generates a 16-bit 'sawtooth' wave at a given BPM | ||
848 | LIB8STATIC uint16_t beat16( accum88 beats_per_minute, uint32_t timebase) | ||
849 | { | ||
850 | // Convert simple 8-bit BPM's to full Q8.8 accum88's if needed | ||
851 | if( beats_per_minute < 256) beats_per_minute <<= 8; | ||
852 | return beat88(beats_per_minute, timebase); | ||
853 | } | ||
854 | |||
855 | /// beat8 generates an 8-bit 'sawtooth' wave at a given BPM | ||
856 | LIB8STATIC uint8_t beat8( accum88 beats_per_minute, uint32_t timebase) | ||
857 | { | ||
858 | return beat16( beats_per_minute, timebase) >> 8; | ||
859 | } | ||
860 | |||
861 | /// beatsin88 generates a 16-bit sine wave at a given BPM, | ||
862 | /// that oscillates within a given range. | ||
863 | /// For this function, BPM MUST BE SPECIFIED as | ||
864 | /// a Q8.8 fixed-point value; e.g. 120BPM must be | ||
865 | /// specified as 120*256 = 30720. | ||
866 | /// If you just want to specify "120", use beatsin16 or beatsin8. | ||
867 | LIB8STATIC uint16_t beatsin88( accum88 beats_per_minute_88, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset) | ||
868 | { | ||
869 | uint16_t beat = beat88( beats_per_minute_88, timebase); | ||
870 | uint16_t beatsin = (sin16( beat + phase_offset) + 32768); | ||
871 | uint16_t rangewidth = highest - lowest; | ||
872 | uint16_t scaledbeat = scale16( beatsin, rangewidth); | ||
873 | uint16_t result = lowest + scaledbeat; | ||
874 | return result; | ||
875 | } | ||
876 | |||
877 | /// beatsin16 generates a 16-bit sine wave at a given BPM, | ||
878 | /// that oscillates within a given range. | ||
879 | LIB8STATIC uint16_t beatsin16(accum88 beats_per_minute, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset) | ||
880 | { | ||
881 | uint16_t beat = beat16( beats_per_minute, timebase); | ||
882 | uint16_t beatsin = (sin16( beat + phase_offset) + 32768); | ||
883 | uint16_t rangewidth = highest - lowest; | ||
884 | uint16_t scaledbeat = scale16( beatsin, rangewidth); | ||
885 | uint16_t result = lowest + scaledbeat; | ||
886 | return result; | ||
887 | } | ||
888 | |||
889 | /// beatsin8 generates an 8-bit sine wave at a given BPM, | ||
890 | /// that oscillates within a given range. | ||
891 | LIB8STATIC uint8_t beatsin8( accum88 beats_per_minute, uint8_t lowest, uint8_t highest, uint32_t timebase, uint8_t phase_offset) | ||
892 | { | ||
893 | uint8_t beat = beat8( beats_per_minute, timebase); | ||
894 | uint8_t beatsin = sin8( beat + phase_offset); | ||
895 | uint8_t rangewidth = highest - lowest; | ||
896 | uint8_t scaledbeat = scale8( beatsin, rangewidth); | ||
897 | uint8_t result = lowest + scaledbeat; | ||
898 | return result; | ||
899 | } | ||
900 | |||
901 | |||
902 | /// Return the current seconds since boot in a 16-bit value. Used as part of the | ||
903 | /// "every N time-periods" mechanism | ||
904 | LIB8STATIC uint16_t seconds16(void) | ||
905 | { | ||
906 | uint32_t ms = GET_MILLIS(); | ||
907 | uint16_t s16; | ||
908 | s16 = ms / 1000; | ||
909 | return s16; | ||
910 | } | ||
911 | |||
912 | /// Return the current minutes since boot in a 16-bit value. Used as part of the | ||
913 | /// "every N time-periods" mechanism | ||
914 | LIB8STATIC uint16_t minutes16(void) | ||
915 | { | ||
916 | uint32_t ms = GET_MILLIS(); | ||
917 | uint16_t m16; | ||
918 | m16 = (ms / (60000L)) & 0xFFFF; | ||
919 | return m16; | ||
920 | } | ||
921 | |||
922 | /// Return the current hours since boot in an 8-bit value. Used as part of the | ||
923 | /// "every N time-periods" mechanism | ||
924 | LIB8STATIC uint8_t hours8(void) | ||
925 | { | ||
926 | uint32_t ms = GET_MILLIS(); | ||
927 | uint8_t h8; | ||
928 | h8 = (ms / (3600000L)) & 0xFF; | ||
929 | return h8; | ||
930 | } | ||
931 | |||
932 | ///@} | ||
933 | |||
934 | #endif | ||
diff --git a/lib/lib8tion/math8.h b/lib/lib8tion/math8.h new file mode 100644 index 000000000..8c6b6c227 --- /dev/null +++ b/lib/lib8tion/math8.h | |||
@@ -0,0 +1,552 @@ | |||
1 | #ifndef __INC_LIB8TION_MATH_H | ||
2 | #define __INC_LIB8TION_MATH_H | ||
3 | |||
4 | #include "scale8.h" | ||
5 | |||
6 | ///@ingroup lib8tion | ||
7 | |||
8 | ///@defgroup Math Basic math operations | ||
9 | /// Fast, efficient 8-bit math functions specifically | ||
10 | /// designed for high-performance LED programming. | ||
11 | /// | ||
12 | /// Because of the AVR(Arduino) and ARM assembly language | ||
13 | /// implementations provided, using these functions often | ||
14 | /// results in smaller and faster code than the equivalent | ||
15 | /// program using plain "C" arithmetic and logic. | ||
16 | ///@{ | ||
17 | |||
18 | |||
19 | /// add one byte to another, saturating at 0xFF | ||
20 | /// @param i - first byte to add | ||
21 | /// @param j - second byte to add | ||
22 | /// @returns the sum of i & j, capped at 0xFF | ||
23 | LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j) | ||
24 | { | ||
25 | #if QADD8_C == 1 | ||
26 | uint16_t t = i + j; | ||
27 | if (t > 255) t = 255; | ||
28 | return t; | ||
29 | #elif QADD8_AVRASM == 1 | ||
30 | asm volatile( | ||
31 | /* First, add j to i, conditioning the C flag */ | ||
32 | "add %0, %1 \n\t" | ||
33 | |||
34 | /* Now test the C flag. | ||
35 | If C is clear, we branch around a load of 0xFF into i. | ||
36 | If C is set, we go ahead and load 0xFF into i. | ||
37 | */ | ||
38 | "brcc L_%= \n\t" | ||
39 | "ldi %0, 0xFF \n\t" | ||
40 | "L_%=: " | ||
41 | : "+a" (i) | ||
42 | : "a" (j) ); | ||
43 | return i; | ||
44 | #elif QADD8_ARM_DSP_ASM == 1 | ||
45 | asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j)); | ||
46 | return i; | ||
47 | #else | ||
48 | #error "No implementation for qadd8 available." | ||
49 | #endif | ||
50 | } | ||
51 | |||
52 | /// Add one byte to another, saturating at 0x7F | ||
53 | /// @param i - first byte to add | ||
54 | /// @param j - second byte to add | ||
55 | /// @returns the sum of i & j, capped at 0xFF | ||
56 | LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j) | ||
57 | { | ||
58 | #if QADD7_C == 1 | ||
59 | int16_t t = i + j; | ||
60 | if (t > 127) t = 127; | ||
61 | return t; | ||
62 | #elif QADD7_AVRASM == 1 | ||
63 | asm volatile( | ||
64 | /* First, add j to i, conditioning the V flag */ | ||
65 | "add %0, %1 \n\t" | ||
66 | |||
67 | /* Now test the V flag. | ||
68 | If V is clear, we branch around a load of 0x7F into i. | ||
69 | If V is set, we go ahead and load 0x7F into i. | ||
70 | */ | ||
71 | "brvc L_%= \n\t" | ||
72 | "ldi %0, 0x7F \n\t" | ||
73 | "L_%=: " | ||
74 | : "+a" (i) | ||
75 | : "a" (j) ); | ||
76 | |||
77 | return i; | ||
78 | #elif QADD7_ARM_DSP_ASM == 1 | ||
79 | asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j)); | ||
80 | return i; | ||
81 | #else | ||
82 | #error "No implementation for qadd7 available." | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | /// subtract one byte from another, saturating at 0x00 | ||
87 | /// @returns i - j with a floor of 0 | ||
88 | LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j) | ||
89 | { | ||
90 | #if QSUB8_C == 1 | ||
91 | int16_t t = i - j; | ||
92 | if (t < 0) t = 0; | ||
93 | return t; | ||
94 | #elif QSUB8_AVRASM == 1 | ||
95 | |||
96 | asm volatile( | ||
97 | /* First, subtract j from i, conditioning the C flag */ | ||
98 | "sub %0, %1 \n\t" | ||
99 | |||
100 | /* Now test the C flag. | ||
101 | If C is clear, we branch around a load of 0x00 into i. | ||
102 | If C is set, we go ahead and load 0x00 into i. | ||
103 | */ | ||
104 | "brcc L_%= \n\t" | ||
105 | "ldi %0, 0x00 \n\t" | ||
106 | "L_%=: " | ||
107 | : "+a" (i) | ||
108 | : "a" (j) ); | ||
109 | |||
110 | return i; | ||
111 | #else | ||
112 | #error "No implementation for qsub8 available." | ||
113 | #endif | ||
114 | } | ||
115 | |||
116 | /// add one byte to another, with one byte result | ||
117 | LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j) | ||
118 | { | ||
119 | #if ADD8_C == 1 | ||
120 | uint16_t t = i + j; | ||
121 | return t; | ||
122 | #elif ADD8_AVRASM == 1 | ||
123 | // Add j to i, period. | ||
124 | asm volatile( "add %0, %1" : "+a" (i) : "a" (j)); | ||
125 | return i; | ||
126 | #else | ||
127 | #error "No implementation for add8 available." | ||
128 | #endif | ||
129 | } | ||
130 | |||
131 | /// add one byte to another, with one byte result | ||
132 | LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j) | ||
133 | { | ||
134 | #if ADD8_C == 1 | ||
135 | uint16_t t = i + j; | ||
136 | return t; | ||
137 | #elif ADD8_AVRASM == 1 | ||
138 | // Add i(one byte) to j(two bytes) | ||
139 | asm volatile( "add %A[j], %[i] \n\t" | ||
140 | "adc %B[j], __zero_reg__ \n\t" | ||
141 | : [j] "+a" (j) | ||
142 | : [i] "a" (i) | ||
143 | ); | ||
144 | return i; | ||
145 | #else | ||
146 | #error "No implementation for add8to16 available." | ||
147 | #endif | ||
148 | } | ||
149 | |||
150 | |||
151 | /// subtract one byte from another, 8-bit result | ||
152 | LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j) | ||
153 | { | ||
154 | #if SUB8_C == 1 | ||
155 | int16_t t = i - j; | ||
156 | return t; | ||
157 | #elif SUB8_AVRASM == 1 | ||
158 | // Subtract j from i, period. | ||
159 | asm volatile( "sub %0, %1" : "+a" (i) : "a" (j)); | ||
160 | return i; | ||
161 | #else | ||
162 | #error "No implementation for sub8 available." | ||
163 | #endif | ||
164 | } | ||
165 | |||
166 | /// Calculate an integer average of two unsigned | ||
167 | /// 8-bit integer values (uint8_t). | ||
168 | /// Fractional results are rounded down, e.g. avg8(20,41) = 30 | ||
169 | LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j) | ||
170 | { | ||
171 | #if AVG8_C == 1 | ||
172 | return (i + j) >> 1; | ||
173 | #elif AVG8_AVRASM == 1 | ||
174 | asm volatile( | ||
175 | /* First, add j to i, 9th bit overflows into C flag */ | ||
176 | "add %0, %1 \n\t" | ||
177 | /* Divide by two, moving C flag into high 8th bit */ | ||
178 | "ror %0 \n\t" | ||
179 | : "+a" (i) | ||
180 | : "a" (j) ); | ||
181 | return i; | ||
182 | #else | ||
183 | #error "No implementation for avg8 available." | ||
184 | #endif | ||
185 | } | ||
186 | |||
187 | /// Calculate an integer average of two unsigned | ||
188 | /// 16-bit integer values (uint16_t). | ||
189 | /// Fractional results are rounded down, e.g. avg16(20,41) = 30 | ||
190 | LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j) | ||
191 | { | ||
192 | #if AVG16_C == 1 | ||
193 | return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1; | ||
194 | #elif AVG16_AVRASM == 1 | ||
195 | asm volatile( | ||
196 | /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */ | ||
197 | "add %A[i], %A[j] \n\t" | ||
198 | /* Now, add C + jHi to iHi, 17th bit overflows into C flag */ | ||
199 | "adc %B[i], %B[j] \n\t" | ||
200 | /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */ | ||
201 | "ror %B[i] \n\t" | ||
202 | /* Divide iLo by two, moving C flag into high 8th bit */ | ||
203 | "ror %A[i] \n\t" | ||
204 | : [i] "+a" (i) | ||
205 | : [j] "a" (j) ); | ||
206 | return i; | ||
207 | #else | ||
208 | #error "No implementation for avg16 available." | ||
209 | #endif | ||
210 | } | ||
211 | |||
212 | |||
213 | /// Calculate an integer average of two signed 7-bit | ||
214 | /// integers (int8_t) | ||
215 | /// If the first argument is even, result is rounded down. | ||
216 | /// If the first argument is odd, result is result up. | ||
217 | LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j) | ||
218 | { | ||
219 | #if AVG7_C == 1 | ||
220 | return ((i + j) >> 1) + (i & 0x1); | ||
221 | #elif AVG7_AVRASM == 1 | ||
222 | asm volatile( | ||
223 | "asr %1 \n\t" | ||
224 | "asr %0 \n\t" | ||
225 | "adc %0, %1 \n\t" | ||
226 | : "+a" (i) | ||
227 | : "a" (j) ); | ||
228 | return i; | ||
229 | #else | ||
230 | #error "No implementation for avg7 available." | ||
231 | #endif | ||
232 | } | ||
233 | |||
234 | /// Calculate an integer average of two signed 15-bit | ||
235 | /// integers (int16_t) | ||
236 | /// If the first argument is even, result is rounded down. | ||
237 | /// If the first argument is odd, result is result up. | ||
238 | LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j) | ||
239 | { | ||
240 | #if AVG15_C == 1 | ||
241 | return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1); | ||
242 | #elif AVG15_AVRASM == 1 | ||
243 | asm volatile( | ||
244 | /* first divide j by 2, throwing away lowest bit */ | ||
245 | "asr %B[j] \n\t" | ||
246 | "ror %A[j] \n\t" | ||
247 | /* now divide i by 2, with lowest bit going into C */ | ||
248 | "asr %B[i] \n\t" | ||
249 | "ror %A[i] \n\t" | ||
250 | /* add j + C to i */ | ||
251 | "adc %A[i], %A[j] \n\t" | ||
252 | "adc %B[i], %B[j] \n\t" | ||
253 | : [i] "+a" (i) | ||
254 | : [j] "a" (j) ); | ||
255 | return i; | ||
256 | #else | ||
257 | #error "No implementation for avg15 available." | ||
258 | #endif | ||
259 | } | ||
260 | |||
261 | |||
262 | /// Calculate the remainder of one unsigned 8-bit | ||
263 | /// value divided by anoter, aka A % M. | ||
264 | /// Implemented by repeated subtraction, which is | ||
265 | /// very compact, and very fast if A is 'probably' | ||
266 | /// less than M. If A is a large multiple of M, | ||
267 | /// the loop has to execute multiple times. However, | ||
268 | /// even in that case, the loop is only two | ||
269 | /// instructions long on AVR, i.e., quick. | ||
270 | LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m) | ||
271 | { | ||
272 | #if defined(__AVR__) | ||
273 | asm volatile ( | ||
274 | "L_%=: sub %[a],%[m] \n\t" | ||
275 | " brcc L_%= \n\t" | ||
276 | " add %[a],%[m] \n\t" | ||
277 | : [a] "+r" (a) | ||
278 | : [m] "r" (m) | ||
279 | ); | ||
280 | #else | ||
281 | while( a >= m) a -= m; | ||
282 | #endif | ||
283 | return a; | ||
284 | } | ||
285 | |||
286 | /// Add two numbers, and calculate the modulo | ||
287 | /// of the sum and a third number, M. | ||
288 | /// In other words, it returns (A+B) % M. | ||
289 | /// It is designed as a compact mechanism for | ||
290 | /// incrementing a 'mode' switch and wrapping | ||
291 | /// around back to 'mode 0' when the switch | ||
292 | /// goes past the end of the available range. | ||
293 | /// e.g. if you have seven modes, this switches | ||
294 | /// to the next one and wraps around if needed: | ||
295 | /// mode = addmod8( mode, 1, 7); | ||
296 | ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance. | ||
297 | LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m) | ||
298 | { | ||
299 | #if defined(__AVR__) | ||
300 | asm volatile ( | ||
301 | " add %[a],%[b] \n\t" | ||
302 | "L_%=: sub %[a],%[m] \n\t" | ||
303 | " brcc L_%= \n\t" | ||
304 | " add %[a],%[m] \n\t" | ||
305 | : [a] "+r" (a) | ||
306 | : [b] "r" (b), [m] "r" (m) | ||
307 | ); | ||
308 | #else | ||
309 | a += b; | ||
310 | while( a >= m) a -= m; | ||
311 | #endif | ||
312 | return a; | ||
313 | } | ||
314 | |||
315 | /// Subtract two numbers, and calculate the modulo | ||
316 | /// of the difference and a third number, M. | ||
317 | /// In other words, it returns (A-B) % M. | ||
318 | /// It is designed as a compact mechanism for | ||
319 | /// incrementing a 'mode' switch and wrapping | ||
320 | /// around back to 'mode 0' when the switch | ||
321 | /// goes past the end of the available range. | ||
322 | /// e.g. if you have seven modes, this switches | ||
323 | /// to the next one and wraps around if needed: | ||
324 | /// mode = addmod8( mode, 1, 7); | ||
325 | ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance. | ||
326 | LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m) | ||
327 | { | ||
328 | #if defined(__AVR__) | ||
329 | asm volatile ( | ||
330 | " sub %[a],%[b] \n\t" | ||
331 | "L_%=: sub %[a],%[m] \n\t" | ||
332 | " brcc L_%= \n\t" | ||
333 | " add %[a],%[m] \n\t" | ||
334 | : [a] "+r" (a) | ||
335 | : [b] "r" (b), [m] "r" (m) | ||
336 | ); | ||
337 | #else | ||
338 | a -= b; | ||
339 | while( a >= m) a -= m; | ||
340 | #endif | ||
341 | return a; | ||
342 | } | ||
343 | |||
344 | /// 8x8 bit multiplication, with 8 bit result | ||
345 | LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j) | ||
346 | { | ||
347 | #if MUL8_C == 1 | ||
348 | return ((uint16_t)i * (uint16_t)(j) ) & 0xFF; | ||
349 | #elif MUL8_AVRASM == 1 | ||
350 | asm volatile( | ||
351 | /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ | ||
352 | "mul %0, %1 \n\t" | ||
353 | /* Extract the LOW 8-bits (r0) */ | ||
354 | "mov %0, r0 \n\t" | ||
355 | /* Restore r1 to "0"; it's expected to always be that */ | ||
356 | "clr __zero_reg__ \n\t" | ||
357 | : "+a" (i) | ||
358 | : "a" (j) | ||
359 | : "r0", "r1"); | ||
360 | |||
361 | return i; | ||
362 | #else | ||
363 | #error "No implementation for mul8 available." | ||
364 | #endif | ||
365 | } | ||
366 | |||
367 | |||
368 | /// saturating 8x8 bit multiplication, with 8 bit result | ||
369 | /// @returns the product of i * j, capping at 0xFF | ||
370 | LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j) | ||
371 | { | ||
372 | #if QMUL8_C == 1 | ||
373 | int p = ((uint16_t)i * (uint16_t)(j) ); | ||
374 | if( p > 255) p = 255; | ||
375 | return p; | ||
376 | #elif QMUL8_AVRASM == 1 | ||
377 | asm volatile( | ||
378 | /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ | ||
379 | " mul %0, %1 \n\t" | ||
380 | /* If high byte of result is zero, all is well. */ | ||
381 | " tst r1 \n\t" | ||
382 | " breq Lnospill_%= \n\t" | ||
383 | /* If high byte of result > 0, saturate low byte to 0xFF */ | ||
384 | " ldi %0,0xFF \n\t" | ||
385 | " rjmp Ldone_%= \n\t" | ||
386 | "Lnospill_%=: \n\t" | ||
387 | /* Extract the LOW 8-bits (r0) */ | ||
388 | " mov %0, r0 \n\t" | ||
389 | "Ldone_%=: \n\t" | ||
390 | /* Restore r1 to "0"; it's expected to always be that */ | ||
391 | " clr __zero_reg__ \n\t" | ||
392 | : "+a" (i) | ||
393 | : "a" (j) | ||
394 | : "r0", "r1"); | ||
395 | |||
396 | return i; | ||
397 | #else | ||
398 | #error "No implementation for qmul8 available." | ||
399 | #endif | ||
400 | } | ||
401 | |||
402 | |||
403 | /// take abs() of a signed 8-bit uint8_t | ||
404 | LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i) | ||
405 | { | ||
406 | #if ABS8_C == 1 | ||
407 | if( i < 0) i = -i; | ||
408 | return i; | ||
409 | #elif ABS8_AVRASM == 1 | ||
410 | |||
411 | |||
412 | asm volatile( | ||
413 | /* First, check the high bit, and prepare to skip if it's clear */ | ||
414 | "sbrc %0, 7 \n" | ||
415 | |||
416 | /* Negate the value */ | ||
417 | "neg %0 \n" | ||
418 | |||
419 | : "+r" (i) : "r" (i) ); | ||
420 | return i; | ||
421 | #else | ||
422 | #error "No implementation for abs8 available." | ||
423 | #endif | ||
424 | } | ||
425 | |||
426 | /// square root for 16-bit integers | ||
427 | /// About three times faster and five times smaller | ||
428 | /// than Arduino's general sqrt on AVR. | ||
429 | LIB8STATIC uint8_t sqrt16(uint16_t x) | ||
430 | { | ||
431 | if( x <= 1) { | ||
432 | return x; | ||
433 | } | ||
434 | |||
435 | uint8_t low = 1; // lower bound | ||
436 | uint8_t hi, mid; | ||
437 | |||
438 | if( x > 7904) { | ||
439 | hi = 255; | ||
440 | } else { | ||
441 | hi = (x >> 5) + 8; // initial estimate for upper bound | ||
442 | } | ||
443 | |||
444 | do { | ||
445 | mid = (low + hi) >> 1; | ||
446 | if ((uint16_t)(mid * mid) > x) { | ||
447 | hi = mid - 1; | ||
448 | } else { | ||
449 | if( mid == 255) { | ||
450 | return 255; | ||
451 | } | ||
452 | low = mid + 1; | ||
453 | } | ||
454 | } while (hi >= low); | ||
455 | |||
456 | return low - 1; | ||
457 | } | ||
458 | |||
459 | /// blend a variable proproportion(0-255) of one byte to another | ||
460 | /// @param a - the starting byte value | ||
461 | /// @param b - the byte value to blend toward | ||
462 | /// @param amountOfB - the proportion (0-255) of b to blend | ||
463 | /// @returns a byte value between a and b, inclusive | ||
464 | #if (FASTLED_BLEND_FIXED == 1) | ||
465 | LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB) | ||
466 | { | ||
467 | #if BLEND8_C == 1 | ||
468 | uint16_t partial; | ||
469 | uint8_t result; | ||
470 | |||
471 | uint8_t amountOfA = 255 - amountOfB; | ||
472 | |||
473 | partial = (a * amountOfA); | ||
474 | #if (FASTLED_SCALE8_FIXED == 1) | ||
475 | partial += a; | ||
476 | //partial = add8to16( a, partial); | ||
477 | #endif | ||
478 | |||
479 | partial += (b * amountOfB); | ||
480 | #if (FASTLED_SCALE8_FIXED == 1) | ||
481 | partial += b; | ||
482 | //partial = add8to16( b, partial); | ||
483 | #endif | ||
484 | |||
485 | result = partial >> 8; | ||
486 | |||
487 | return result; | ||
488 | |||
489 | #elif BLEND8_AVRASM == 1 | ||
490 | uint16_t partial; | ||
491 | uint8_t result; | ||
492 | |||
493 | asm volatile ( | ||
494 | /* partial = b * amountOfB */ | ||
495 | " mul %[b], %[amountOfB] \n\t" | ||
496 | " movw %A[partial], r0 \n\t" | ||
497 | |||
498 | /* amountOfB (aka amountOfA) = 255 - amountOfB */ | ||
499 | " com %[amountOfB] \n\t" | ||
500 | |||
501 | /* partial += a * amountOfB (aka amountOfA) */ | ||
502 | " mul %[a], %[amountOfB] \n\t" | ||
503 | |||
504 | " add %A[partial], r0 \n\t" | ||
505 | " adc %B[partial], r1 \n\t" | ||
506 | |||
507 | " clr __zero_reg__ \n\t" | ||
508 | |||
509 | #if (FASTLED_SCALE8_FIXED == 1) | ||
510 | /* partial += a */ | ||
511 | " add %A[partial], %[a] \n\t" | ||
512 | " adc %B[partial], __zero_reg__ \n\t" | ||
513 | |||
514 | // partial += b | ||
515 | " add %A[partial], %[b] \n\t" | ||
516 | " adc %B[partial], __zero_reg__ \n\t" | ||
517 | #endif | ||
518 | |||
519 | : [partial] "=r" (partial), | ||
520 | [amountOfB] "+a" (amountOfB) | ||
521 | : [a] "a" (a), | ||
522 | [b] "a" (b) | ||
523 | : "r0", "r1" | ||
524 | ); | ||
525 | |||
526 | result = partial >> 8; | ||
527 | |||
528 | return result; | ||
529 | |||
530 | #else | ||
531 | #error "No implementation for blend8 available." | ||
532 | #endif | ||
533 | } | ||
534 | |||
535 | #else | ||
536 | LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB) | ||
537 | { | ||
538 | // This version loses precision in the integer math | ||
539 | // and can actually return results outside of the range | ||
540 | // from a to b. Its use is not recommended. | ||
541 | uint8_t result; | ||
542 | uint8_t amountOfA = 255 - amountOfB; | ||
543 | result = scale8_LEAVING_R1_DIRTY( a, amountOfA) | ||
544 | + scale8_LEAVING_R1_DIRTY( b, amountOfB); | ||
545 | cleanup_R1(); | ||
546 | return result; | ||
547 | } | ||
548 | #endif | ||
549 | |||
550 | |||
551 | ///@} | ||
552 | #endif | ||
diff --git a/lib/lib8tion/random8.h b/lib/lib8tion/random8.h new file mode 100644 index 000000000..7ee67cbb3 --- /dev/null +++ b/lib/lib8tion/random8.h | |||
@@ -0,0 +1,94 @@ | |||
1 | #ifndef __INC_LIB8TION_RANDOM_H | ||
2 | #define __INC_LIB8TION_RANDOM_H | ||
3 | ///@ingroup lib8tion | ||
4 | |||
5 | ///@defgroup Random Fast random number generators | ||
6 | /// Fast 8- and 16- bit unsigned random numbers. | ||
7 | /// Significantly faster than Arduino random(), but | ||
8 | /// also somewhat less random. You can add entropy. | ||
9 | ///@{ | ||
10 | |||
11 | // X(n+1) = (2053 * X(n)) + 13849) | ||
12 | #define FASTLED_RAND16_2053 ((uint16_t)(2053)) | ||
13 | #define FASTLED_RAND16_13849 ((uint16_t)(13849)) | ||
14 | |||
15 | /// random number seed | ||
16 | extern uint16_t rand16seed;// = RAND16_SEED; | ||
17 | |||
18 | /// Generate an 8-bit random number | ||
19 | LIB8STATIC uint8_t random8(void) | ||
20 | { | ||
21 | rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849; | ||
22 | // return the sum of the high and low bytes, for better | ||
23 | // mixing and non-sequential correlation | ||
24 | return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) + | ||
25 | ((uint8_t)(rand16seed >> 8))); | ||
26 | } | ||
27 | |||
28 | /// Generate a 16 bit random number | ||
29 | LIB8STATIC uint16_t random16(void) | ||
30 | { | ||
31 | rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849; | ||
32 | return rand16seed; | ||
33 | } | ||
34 | |||
35 | /// Generate an 8-bit random number between 0 and lim | ||
36 | /// @param lim the upper bound for the result | ||
37 | LIB8STATIC uint8_t random8_max(uint8_t lim) | ||
38 | { | ||
39 | uint8_t r = random8(); | ||
40 | r = (r*lim) >> 8; | ||
41 | return r; | ||
42 | } | ||
43 | |||
44 | /// Generate an 8-bit random number in the given range | ||
45 | /// @param min the lower bound for the random number | ||
46 | /// @param lim the upper bound for the random number | ||
47 | LIB8STATIC uint8_t random8_min_max(uint8_t min, uint8_t lim) | ||
48 | { | ||
49 | uint8_t delta = lim - min; | ||
50 | uint8_t r = random8_max(delta) + min; | ||
51 | return r; | ||
52 | } | ||
53 | |||
54 | /// Generate an 16-bit random number between 0 and lim | ||
55 | /// @param lim the upper bound for the result | ||
56 | LIB8STATIC uint16_t random16_max(uint16_t lim) | ||
57 | { | ||
58 | uint16_t r = random16(); | ||
59 | uint32_t p = (uint32_t)lim * (uint32_t)r; | ||
60 | r = p >> 16; | ||
61 | return r; | ||
62 | } | ||
63 | |||
64 | /// Generate an 16-bit random number in the given range | ||
65 | /// @param min the lower bound for the random number | ||
66 | /// @param lim the upper bound for the random number | ||
67 | LIB8STATIC uint16_t random16_min_max( uint16_t min, uint16_t lim) | ||
68 | { | ||
69 | uint16_t delta = lim - min; | ||
70 | uint16_t r = random16_max(delta) + min; | ||
71 | return r; | ||
72 | } | ||
73 | |||
74 | /// Set the 16-bit seed used for the random number generator | ||
75 | LIB8STATIC void random16_set_seed(uint16_t seed) | ||
76 | { | ||
77 | rand16seed = seed; | ||
78 | } | ||
79 | |||
80 | /// Get the current seed value for the random number generator | ||
81 | LIB8STATIC uint16_t random16_get_seed(void) | ||
82 | { | ||
83 | return rand16seed; | ||
84 | } | ||
85 | |||
86 | /// Add entropy into the random number generator | ||
87 | LIB8STATIC void random16_add_entropy(uint16_t entropy) | ||
88 | { | ||
89 | rand16seed += entropy; | ||
90 | } | ||
91 | |||
92 | ///@} | ||
93 | |||
94 | #endif | ||
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h new file mode 100644 index 000000000..9895fd4d7 --- /dev/null +++ b/lib/lib8tion/scale8.h | |||
@@ -0,0 +1,542 @@ | |||
1 | #ifndef __INC_LIB8TION_SCALE_H | ||
2 | #define __INC_LIB8TION_SCALE_H | ||
3 | |||
4 | ///@ingroup lib8tion | ||
5 | |||
6 | ///@defgroup Scaling Scaling functions | ||
7 | /// Fast, efficient 8-bit scaling functions specifically | ||
8 | /// designed for high-performance LED programming. | ||
9 | /// | ||
10 | /// Because of the AVR(Arduino) and ARM assembly language | ||
11 | /// implementations provided, using these functions often | ||
12 | /// results in smaller and faster code than the equivalent | ||
13 | /// program using plain "C" arithmetic and logic. | ||
14 | ///@{ | ||
15 | |||
16 | /// scale one byte by a second one, which is treated as | ||
17 | /// the numerator of a fraction whose denominator is 256 | ||
18 | /// In other words, it computes i * (scale / 256) | ||
19 | /// 4 clocks AVR with MUL, 2 clocks ARM | ||
20 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale) | ||
21 | { | ||
22 | #if SCALE8_C == 1 | ||
23 | #if (FASTLED_SCALE8_FIXED == 1) | ||
24 | return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8; | ||
25 | #else | ||
26 | return ((uint16_t)i * (uint16_t)(scale) ) >> 8; | ||
27 | #endif | ||
28 | #elif SCALE8_AVRASM == 1 | ||
29 | #if defined(LIB8_ATTINY) | ||
30 | #if (FASTLED_SCALE8_FIXED == 1) | ||
31 | uint8_t work=i; | ||
32 | #else | ||
33 | uint8_t work=0; | ||
34 | #endif | ||
35 | uint8_t cnt=0x80; | ||
36 | asm volatile( | ||
37 | #if (FASTLED_SCALE8_FIXED == 1) | ||
38 | " inc %[scale] \n\t" | ||
39 | " breq DONE_%= \n\t" | ||
40 | " clr %[work] \n\t" | ||
41 | #endif | ||
42 | "LOOP_%=: \n\t" | ||
43 | /*" sbrc %[scale], 0 \n\t" | ||
44 | " add %[work], %[i] \n\t" | ||
45 | " ror %[work] \n\t" | ||
46 | " lsr %[scale] \n\t" | ||
47 | " clc \n\t"*/ | ||
48 | " sbrc %[scale], 0 \n\t" | ||
49 | " add %[work], %[i] \n\t" | ||
50 | " ror %[work] \n\t" | ||
51 | " lsr %[scale] \n\t" | ||
52 | " lsr %[cnt] \n\t" | ||
53 | "brcc LOOP_%= \n\t" | ||
54 | "DONE_%=: \n\t" | ||
55 | : [work] "+r" (work), [cnt] "+r" (cnt) | ||
56 | : [scale] "r" (scale), [i] "r" (i) | ||
57 | : | ||
58 | ); | ||
59 | return work; | ||
60 | #else | ||
61 | asm volatile( | ||
62 | #if (FASTLED_SCALE8_FIXED==1) | ||
63 | // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 | ||
64 | "mul %0, %1 \n\t" | ||
65 | // Add i to r0, possibly setting the carry flag | ||
66 | "add r0, %0 \n\t" | ||
67 | // load the immediate 0 into i (note, this does _not_ touch any flags) | ||
68 | "ldi %0, 0x00 \n\t" | ||
69 | // walk and chew gum at the same time | ||
70 | "adc %0, r1 \n\t" | ||
71 | #else | ||
72 | /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ | ||
73 | "mul %0, %1 \n\t" | ||
74 | /* Move the high 8-bits of the product (r1) back to i */ | ||
75 | "mov %0, r1 \n\t" | ||
76 | /* Restore r1 to "0"; it's expected to always be that */ | ||
77 | #endif | ||
78 | "clr __zero_reg__ \n\t" | ||
79 | |||
80 | : "+a" (i) /* writes to i */ | ||
81 | : "a" (scale) /* uses scale */ | ||
82 | : "r0", "r1" /* clobbers r0, r1 */ ); | ||
83 | |||
84 | /* Return the result */ | ||
85 | return i; | ||
86 | #endif | ||
87 | #else | ||
88 | #error "No implementation for scale8 available." | ||
89 | #endif | ||
90 | } | ||
91 | |||
92 | |||
93 | /// The "video" version of scale8 guarantees that the output will | ||
94 | /// be only be zero if one or both of the inputs are zero. If both | ||
95 | /// inputs are non-zero, the output is guaranteed to be non-zero. | ||
96 | /// This makes for better 'video'/LED dimming, at the cost of | ||
97 | /// several additional cycles. | ||
98 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale) | ||
99 | { | ||
100 | #if SCALE8_C == 1 || defined(LIB8_ATTINY) | ||
101 | uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); | ||
102 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
103 | // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; | ||
104 | return j; | ||
105 | #elif SCALE8_AVRASM == 1 | ||
106 | uint8_t j=0; | ||
107 | asm volatile( | ||
108 | " tst %[i]\n\t" | ||
109 | " breq L_%=\n\t" | ||
110 | " mul %[i], %[scale]\n\t" | ||
111 | " mov %[j], r1\n\t" | ||
112 | " clr __zero_reg__\n\t" | ||
113 | " cpse %[scale], r1\n\t" | ||
114 | " subi %[j], 0xFF\n\t" | ||
115 | "L_%=: \n\t" | ||
116 | : [j] "+a" (j) | ||
117 | : [i] "a" (i), [scale] "a" (scale) | ||
118 | : "r0", "r1"); | ||
119 | |||
120 | return j; | ||
121 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
122 | // asm volatile( | ||
123 | // " tst %0 \n" | ||
124 | // " breq L_%= \n" | ||
125 | // " mul %0, %1 \n" | ||
126 | // " mov %0, r1 \n" | ||
127 | // " add %0, %2 \n" | ||
128 | // " clr __zero_reg__ \n" | ||
129 | // "L_%=: \n" | ||
130 | |||
131 | // : "+a" (i) | ||
132 | // : "a" (scale), "a" (nonzeroscale) | ||
133 | // : "r0", "r1"); | ||
134 | |||
135 | // // Return the result | ||
136 | // return i; | ||
137 | #else | ||
138 | #error "No implementation for scale8_video available." | ||
139 | #endif | ||
140 | } | ||
141 | |||
142 | |||
143 | /// This version of scale8 does not clean up the R1 register on AVR | ||
144 | /// If you are doing several 'scale8's in a row, use this, and | ||
145 | /// then explicitly call cleanup_R1. | ||
146 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) | ||
147 | { | ||
148 | #if SCALE8_C == 1 | ||
149 | #if (FASTLED_SCALE8_FIXED == 1) | ||
150 | return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8; | ||
151 | #else | ||
152 | return ((int)i * (int)(scale) ) >> 8; | ||
153 | #endif | ||
154 | #elif SCALE8_AVRASM == 1 | ||
155 | asm volatile( | ||
156 | #if (FASTLED_SCALE8_FIXED==1) | ||
157 | // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 | ||
158 | "mul %0, %1 \n\t" | ||
159 | // Add i to r0, possibly setting the carry flag | ||
160 | "add r0, %0 \n\t" | ||
161 | // load the immediate 0 into i (note, this does _not_ touch any flags) | ||
162 | "ldi %0, 0x00 \n\t" | ||
163 | // walk and chew gum at the same time | ||
164 | "adc %0, r1 \n\t" | ||
165 | #else | ||
166 | /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ | ||
167 | "mul %0, %1 \n\t" | ||
168 | /* Move the high 8-bits of the product (r1) back to i */ | ||
169 | "mov %0, r1 \n\t" | ||
170 | #endif | ||
171 | /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ | ||
172 | /* "clr __zero_reg__ \n\t" */ | ||
173 | |||
174 | : "+a" (i) /* writes to i */ | ||
175 | : "a" (scale) /* uses scale */ | ||
176 | : "r0", "r1" /* clobbers r0, r1 */ ); | ||
177 | |||
178 | // Return the result | ||
179 | return i; | ||
180 | #else | ||
181 | #error "No implementation for scale8_LEAVING_R1_DIRTY available." | ||
182 | #endif | ||
183 | } | ||
184 | |||
185 | |||
186 | /// This version of scale8_video does not clean up the R1 register on AVR | ||
187 | /// If you are doing several 'scale8_video's in a row, use this, and | ||
188 | /// then explicitly call cleanup_R1. | ||
189 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) | ||
190 | { | ||
191 | #if SCALE8_C == 1 || defined(LIB8_ATTINY) | ||
192 | uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); | ||
193 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
194 | // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; | ||
195 | return j; | ||
196 | #elif SCALE8_AVRASM == 1 | ||
197 | uint8_t j=0; | ||
198 | asm volatile( | ||
199 | " tst %[i]\n\t" | ||
200 | " breq L_%=\n\t" | ||
201 | " mul %[i], %[scale]\n\t" | ||
202 | " mov %[j], r1\n\t" | ||
203 | " breq L_%=\n\t" | ||
204 | " subi %[j], 0xFF\n\t" | ||
205 | "L_%=: \n\t" | ||
206 | : [j] "+a" (j) | ||
207 | : [i] "a" (i), [scale] "a" (scale) | ||
208 | : "r0", "r1"); | ||
209 | |||
210 | return j; | ||
211 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
212 | // asm volatile( | ||
213 | // " tst %0 \n" | ||
214 | // " breq L_%= \n" | ||
215 | // " mul %0, %1 \n" | ||
216 | // " mov %0, r1 \n" | ||
217 | // " add %0, %2 \n" | ||
218 | // " clr __zero_reg__ \n" | ||
219 | // "L_%=: \n" | ||
220 | |||
221 | // : "+a" (i) | ||
222 | // : "a" (scale), "a" (nonzeroscale) | ||
223 | // : "r0", "r1"); | ||
224 | |||
225 | // // Return the result | ||
226 | // return i; | ||
227 | #else | ||
228 | #error "No implementation for scale8_video_LEAVING_R1_DIRTY available." | ||
229 | #endif | ||
230 | } | ||
231 | |||
232 | /// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls | ||
233 | LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void) | ||
234 | { | ||
235 | #if CLEANUP_R1_AVRASM == 1 | ||
236 | // Restore r1 to "0"; it's expected to always be that | ||
237 | asm volatile( "clr __zero_reg__ \n\t" : : : "r1" ); | ||
238 | #endif | ||
239 | } | ||
240 | |||
241 | |||
242 | /// scale a 16-bit unsigned value by an 8-bit value, | ||
243 | /// considered as numerator of a fraction whose denominator | ||
244 | /// is 256. In other words, it computes i * (scale / 256) | ||
245 | |||
246 | LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) | ||
247 | { | ||
248 | #if SCALE16BY8_C == 1 | ||
249 | uint16_t result; | ||
250 | #if FASTLED_SCALE8_FIXED == 1 | ||
251 | result = (i * (1+((uint16_t)scale))) >> 8; | ||
252 | #else | ||
253 | result = (i * scale) / 256; | ||
254 | #endif | ||
255 | return result; | ||
256 | #elif SCALE16BY8_AVRASM == 1 | ||
257 | #if FASTLED_SCALE8_FIXED == 1 | ||
258 | uint16_t result = 0; | ||
259 | asm volatile( | ||
260 | // result.A = HighByte( (i.A x scale) + i.A ) | ||
261 | " mul %A[i], %[scale] \n\t" | ||
262 | " add r0, %A[i] \n\t" | ||
263 | // " adc r1, [zero] \n\t" | ||
264 | // " mov %A[result], r1 \n\t" | ||
265 | " adc %A[result], r1 \n\t" | ||
266 | |||
267 | // result.A-B += i.B x scale | ||
268 | " mul %B[i], %[scale] \n\t" | ||
269 | " add %A[result], r0 \n\t" | ||
270 | " adc %B[result], r1 \n\t" | ||
271 | |||
272 | // cleanup r1 | ||
273 | " clr __zero_reg__ \n\t" | ||
274 | |||
275 | // result.A-B += i.B | ||
276 | " add %A[result], %B[i] \n\t" | ||
277 | " adc %B[result], __zero_reg__ \n\t" | ||
278 | |||
279 | : [result] "+r" (result) | ||
280 | : [i] "r" (i), [scale] "r" (scale) | ||
281 | : "r0", "r1" | ||
282 | ); | ||
283 | return result; | ||
284 | #else | ||
285 | uint16_t result = 0; | ||
286 | asm volatile( | ||
287 | // result.A = HighByte(i.A x j ) | ||
288 | " mul %A[i], %[scale] \n\t" | ||
289 | " mov %A[result], r1 \n\t" | ||
290 | //" clr %B[result] \n\t" | ||
291 | |||
292 | // result.A-B += i.B x j | ||
293 | " mul %B[i], %[scale] \n\t" | ||
294 | " add %A[result], r0 \n\t" | ||
295 | " adc %B[result], r1 \n\t" | ||
296 | |||
297 | // cleanup r1 | ||
298 | " clr __zero_reg__ \n\t" | ||
299 | |||
300 | : [result] "+r" (result) | ||
301 | : [i] "r" (i), [scale] "r" (scale) | ||
302 | : "r0", "r1" | ||
303 | ); | ||
304 | return result; | ||
305 | #endif | ||
306 | #else | ||
307 | #error "No implementation for scale16by8 available." | ||
308 | #endif | ||
309 | } | ||
310 | |||
311 | /// scale a 16-bit unsigned value by a 16-bit value, | ||
312 | /// considered as numerator of a fraction whose denominator | ||
313 | /// is 65536. In other words, it computes i * (scale / 65536) | ||
314 | |||
315 | LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) | ||
316 | { | ||
317 | #if SCALE16_C == 1 | ||
318 | uint16_t result; | ||
319 | #if FASTLED_SCALE8_FIXED == 1 | ||
320 | result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536; | ||
321 | #else | ||
322 | result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; | ||
323 | #endif | ||
324 | return result; | ||
325 | #elif SCALE16_AVRASM == 1 | ||
326 | #if FASTLED_SCALE8_FIXED == 1 | ||
327 | // implemented sort of like | ||
328 | // result = ((i * scale) + i ) / 65536 | ||
329 | // | ||
330 | // why not like this, you may ask? | ||
331 | // result = (i * (scale+1)) / 65536 | ||
332 | // the answer is that if scale is 65535, then scale+1 | ||
333 | // will be zero, which is not what we want. | ||
334 | uint32_t result; | ||
335 | asm volatile( | ||
336 | // result.A-B = i.A x scale.A | ||
337 | " mul %A[i], %A[scale] \n\t" | ||
338 | // save results... | ||
339 | // basic idea: | ||
340 | //" mov %A[result], r0 \n\t" | ||
341 | //" mov %B[result], r1 \n\t" | ||
342 | // which can be written as... | ||
343 | " movw %A[result], r0 \n\t" | ||
344 | // Because we're going to add i.A-B to | ||
345 | // result.A-D, we DO need to keep both | ||
346 | // the r0 and r1 portions of the product | ||
347 | // UNlike in the 'unfixed scale8' version. | ||
348 | // So the movw here is needed. | ||
349 | : [result] "=r" (result) | ||
350 | : [i] "r" (i), | ||
351 | [scale] "r" (scale) | ||
352 | : "r0", "r1" | ||
353 | ); | ||
354 | |||
355 | asm volatile( | ||
356 | // result.C-D = i.B x scale.B | ||
357 | " mul %B[i], %B[scale] \n\t" | ||
358 | //" mov %C[result], r0 \n\t" | ||
359 | //" mov %D[result], r1 \n\t" | ||
360 | " movw %C[result], r0 \n\t" | ||
361 | : [result] "+r" (result) | ||
362 | : [i] "r" (i), | ||
363 | [scale] "r" (scale) | ||
364 | : "r0", "r1" | ||
365 | ); | ||
366 | |||
367 | const uint8_t zero = 0; | ||
368 | asm volatile( | ||
369 | // result.B-D += i.B x scale.A | ||
370 | " mul %B[i], %A[scale] \n\t" | ||
371 | |||
372 | " add %B[result], r0 \n\t" | ||
373 | " adc %C[result], r1 \n\t" | ||
374 | " adc %D[result], %[zero] \n\t" | ||
375 | |||
376 | // result.B-D += i.A x scale.B | ||
377 | " mul %A[i], %B[scale] \n\t" | ||
378 | |||
379 | " add %B[result], r0 \n\t" | ||
380 | " adc %C[result], r1 \n\t" | ||
381 | " adc %D[result], %[zero] \n\t" | ||
382 | |||
383 | // cleanup r1 | ||
384 | " clr r1 \n\t" | ||
385 | |||
386 | : [result] "+r" (result) | ||
387 | : [i] "r" (i), | ||
388 | [scale] "r" (scale), | ||
389 | [zero] "r" (zero) | ||
390 | : "r0", "r1" | ||
391 | ); | ||
392 | |||
393 | asm volatile( | ||
394 | // result.A-D += i.A-B | ||
395 | " add %A[result], %A[i] \n\t" | ||
396 | " adc %B[result], %B[i] \n\t" | ||
397 | " adc %C[result], %[zero] \n\t" | ||
398 | " adc %D[result], %[zero] \n\t" | ||
399 | : [result] "+r" (result) | ||
400 | : [i] "r" (i), | ||
401 | [zero] "r" (zero) | ||
402 | ); | ||
403 | |||
404 | result = result >> 16; | ||
405 | return result; | ||
406 | #else | ||
407 | uint32_t result; | ||
408 | asm volatile( | ||
409 | // result.A-B = i.A x scale.A | ||
410 | " mul %A[i], %A[scale] \n\t" | ||
411 | // save results... | ||
412 | // basic idea: | ||
413 | //" mov %A[result], r0 \n\t" | ||
414 | //" mov %B[result], r1 \n\t" | ||
415 | // which can be written as... | ||
416 | " movw %A[result], r0 \n\t" | ||
417 | // We actually don't need to do anything with r0, | ||
418 | // as result.A is never used again here, so we | ||
419 | // could just move the high byte, but movw is | ||
420 | // one clock cycle, just like mov, so might as | ||
421 | // well, in case we want to use this code for | ||
422 | // a generic 16x16 multiply somewhere. | ||
423 | |||
424 | : [result] "=r" (result) | ||
425 | : [i] "r" (i), | ||
426 | [scale] "r" (scale) | ||
427 | : "r0", "r1" | ||
428 | ); | ||
429 | |||
430 | asm volatile( | ||
431 | // result.C-D = i.B x scale.B | ||
432 | " mul %B[i], %B[scale] \n\t" | ||
433 | //" mov %C[result], r0 \n\t" | ||
434 | //" mov %D[result], r1 \n\t" | ||
435 | " movw %C[result], r0 \n\t" | ||
436 | : [result] "+r" (result) | ||
437 | : [i] "r" (i), | ||
438 | [scale] "r" (scale) | ||
439 | : "r0", "r1" | ||
440 | ); | ||
441 | |||
442 | const uint8_t zero = 0; | ||
443 | asm volatile( | ||
444 | // result.B-D += i.B x scale.A | ||
445 | " mul %B[i], %A[scale] \n\t" | ||
446 | |||
447 | " add %B[result], r0 \n\t" | ||
448 | " adc %C[result], r1 \n\t" | ||
449 | " adc %D[result], %[zero] \n\t" | ||
450 | |||
451 | // result.B-D += i.A x scale.B | ||
452 | " mul %A[i], %B[scale] \n\t" | ||
453 | |||
454 | " add %B[result], r0 \n\t" | ||
455 | " adc %C[result], r1 \n\t" | ||
456 | " adc %D[result], %[zero] \n\t" | ||
457 | |||
458 | // cleanup r1 | ||
459 | " clr r1 \n\t" | ||
460 | |||
461 | : [result] "+r" (result) | ||
462 | : [i] "r" (i), | ||
463 | [scale] "r" (scale), | ||
464 | [zero] "r" (zero) | ||
465 | : "r0", "r1" | ||
466 | ); | ||
467 | |||
468 | result = result >> 16; | ||
469 | return result; | ||
470 | #endif | ||
471 | #else | ||
472 | #error "No implementation for scale16 available." | ||
473 | #endif | ||
474 | } | ||
475 | ///@} | ||
476 | |||
477 | ///@defgroup Dimming Dimming and brightening functions | ||
478 | /// | ||
479 | /// Dimming and brightening functions | ||
480 | /// | ||
481 | /// The eye does not respond in a linear way to light. | ||
482 | /// High speed PWM'd LEDs at 50% duty cycle appear far | ||
483 | /// brighter then the 'half as bright' you might expect. | ||
484 | /// | ||
485 | /// If you want your midpoint brightness leve (128) to | ||
486 | /// appear half as bright as 'full' brightness (255), you | ||
487 | /// have to apply a 'dimming function'. | ||
488 | ///@{ | ||
489 | |||
490 | /// Adjust a scaling value for dimming | ||
491 | LIB8STATIC uint8_t dim8_raw( uint8_t x) | ||
492 | { | ||
493 | return scale8( x, x); | ||
494 | } | ||
495 | |||
496 | /// Adjust a scaling value for dimming for video (value will never go below 1) | ||
497 | LIB8STATIC uint8_t dim8_video( uint8_t x) | ||
498 | { | ||
499 | return scale8_video( x, x); | ||
500 | } | ||
501 | |||
502 | /// Linear version of the dimming function that halves for values < 128 | ||
503 | LIB8STATIC uint8_t dim8_lin( uint8_t x ) | ||
504 | { | ||
505 | if( x & 0x80 ) { | ||
506 | x = scale8( x, x); | ||
507 | } else { | ||
508 | x += 1; | ||
509 | x /= 2; | ||
510 | } | ||
511 | return x; | ||
512 | } | ||
513 | |||
514 | /// inverse of the dimming function, brighten a value | ||
515 | LIB8STATIC uint8_t brighten8_raw( uint8_t x) | ||
516 | { | ||
517 | uint8_t ix = 255 - x; | ||
518 | return 255 - scale8( ix, ix); | ||
519 | } | ||
520 | |||
521 | /// inverse of the dimming function, brighten a value | ||
522 | LIB8STATIC uint8_t brighten8_video( uint8_t x) | ||
523 | { | ||
524 | uint8_t ix = 255 - x; | ||
525 | return 255 - scale8_video( ix, ix); | ||
526 | } | ||
527 | |||
528 | /// inverse of the dimming function, brighten a value | ||
529 | LIB8STATIC uint8_t brighten8_lin( uint8_t x ) | ||
530 | { | ||
531 | uint8_t ix = 255 - x; | ||
532 | if( ix & 0x80 ) { | ||
533 | ix = scale8( ix, ix); | ||
534 | } else { | ||
535 | ix += 1; | ||
536 | ix /= 2; | ||
537 | } | ||
538 | return 255 - ix; | ||
539 | } | ||
540 | |||
541 | ///@} | ||
542 | #endif | ||
diff --git a/lib/lib8tion/trig8.h b/lib/lib8tion/trig8.h new file mode 100644 index 000000000..4907c6ff3 --- /dev/null +++ b/lib/lib8tion/trig8.h | |||
@@ -0,0 +1,259 @@ | |||
1 | #ifndef __INC_LIB8TION_TRIG_H | ||
2 | #define __INC_LIB8TION_TRIG_H | ||
3 | |||
4 | ///@ingroup lib8tion | ||
5 | |||
6 | ///@defgroup Trig Fast trig functions | ||
7 | /// Fast 8 and 16-bit approximations of sin(x) and cos(x). | ||
8 | /// Don't use these approximations for calculating the | ||
9 | /// trajectory of a rocket to Mars, but they're great | ||
10 | /// for art projects and LED displays. | ||
11 | /// | ||
12 | /// On Arduino/AVR, the 16-bit approximation is more than | ||
13 | /// 10X faster than floating point sin(x) and cos(x), while | ||
14 | /// the 8-bit approximation is more than 20X faster. | ||
15 | ///@{ | ||
16 | |||
17 | #if defined(__AVR__) | ||
18 | #define sin16 sin16_avr | ||
19 | #else | ||
20 | #define sin16 sin16_C | ||
21 | #endif | ||
22 | |||
23 | /// Fast 16-bit approximation of sin(x). This approximation never varies more than | ||
24 | /// 0.69% from the floating point value you'd get by doing | ||
25 | /// | ||
26 | /// float s = sin(x) * 32767.0; | ||
27 | /// | ||
28 | /// @param theta input angle from 0-65535 | ||
29 | /// @returns sin of theta, value between -32767 to 32767. | ||
30 | LIB8STATIC int16_t sin16_avr( uint16_t theta ) | ||
31 | { | ||
32 | static const uint8_t data[] = | ||
33 | { 0, 0, 49, 0, 6393%256, 6393/256, 48, 0, | ||
34 | 12539%256, 12539/256, 44, 0, 18204%256, 18204/256, 38, 0, | ||
35 | 23170%256, 23170/256, 31, 0, 27245%256, 27245/256, 23, 0, | ||
36 | 30273%256, 30273/256, 14, 0, 32137%256, 32137/256, 4 /*,0*/ }; | ||
37 | |||
38 | uint16_t offset = (theta & 0x3FFF); | ||
39 | |||
40 | // AVR doesn't have a multi-bit shift instruction, | ||
41 | // so if we say "offset >>= 3", gcc makes a tiny loop. | ||
42 | // Inserting empty volatile statements between each | ||
43 | // bit shift forces gcc to unroll the loop. | ||
44 | offset >>= 1; // 0..8191 | ||
45 | asm volatile(""); | ||
46 | offset >>= 1; // 0..4095 | ||
47 | asm volatile(""); | ||
48 | offset >>= 1; // 0..2047 | ||
49 | |||
50 | if( theta & 0x4000 ) offset = 2047 - offset; | ||
51 | |||
52 | uint8_t sectionX4; | ||
53 | sectionX4 = offset / 256; | ||
54 | sectionX4 *= 4; | ||
55 | |||
56 | uint8_t m; | ||
57 | |||
58 | union { | ||
59 | uint16_t b; | ||
60 | struct { | ||
61 | uint8_t blo; | ||
62 | uint8_t bhi; | ||
63 | }; | ||
64 | } u; | ||
65 | |||
66 | //in effect u.b = blo + (256 * bhi); | ||
67 | u.blo = data[ sectionX4 ]; | ||
68 | u.bhi = data[ sectionX4 + 1]; | ||
69 | m = data[ sectionX4 + 2]; | ||
70 | |||
71 | uint8_t secoffset8 = (uint8_t)(offset) / 2; | ||
72 | |||
73 | uint16_t mx = m * secoffset8; | ||
74 | |||
75 | int16_t y = mx + u.b; | ||
76 | if( theta & 0x8000 ) y = -y; | ||
77 | |||
78 | return y; | ||
79 | } | ||
80 | |||
81 | /// Fast 16-bit approximation of sin(x). This approximation never varies more than | ||
82 | /// 0.69% from the floating point value you'd get by doing | ||
83 | /// | ||
84 | /// float s = sin(x) * 32767.0; | ||
85 | /// | ||
86 | /// @param theta input angle from 0-65535 | ||
87 | /// @returns sin of theta, value between -32767 to 32767. | ||
88 | LIB8STATIC int16_t sin16_C( uint16_t theta ) | ||
89 | { | ||
90 | static const uint16_t base[] = | ||
91 | { 0, 6393, 12539, 18204, 23170, 27245, 30273, 32137 }; | ||
92 | static const uint8_t slope[] = | ||
93 | { 49, 48, 44, 38, 31, 23, 14, 4 }; | ||
94 | |||
95 | uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047 | ||
96 | if( theta & 0x4000 ) offset = 2047 - offset; | ||
97 | |||
98 | uint8_t section = offset / 256; // 0..7 | ||
99 | uint16_t b = base[section]; | ||
100 | uint8_t m = slope[section]; | ||
101 | |||
102 | uint8_t secoffset8 = (uint8_t)(offset) / 2; | ||
103 | |||
104 | uint16_t mx = m * secoffset8; | ||
105 | int16_t y = mx + b; | ||
106 | |||
107 | if( theta & 0x8000 ) y = -y; | ||
108 | |||
109 | return y; | ||
110 | } | ||
111 | |||
112 | |||
113 | /// Fast 16-bit approximation of cos(x). This approximation never varies more than | ||
114 | /// 0.69% from the floating point value you'd get by doing | ||
115 | /// | ||
116 | /// float s = cos(x) * 32767.0; | ||
117 | /// | ||
118 | /// @param theta input angle from 0-65535 | ||
119 | /// @returns sin of theta, value between -32767 to 32767. | ||
120 | LIB8STATIC int16_t cos16( uint16_t theta) | ||
121 | { | ||
122 | return sin16( theta + 16384); | ||
123 | } | ||
124 | |||
125 | /////////////////////////////////////////////////////////////////////// | ||
126 | |||
127 | // sin8 & cos8 | ||
128 | // Fast 8-bit approximations of sin(x) & cos(x). | ||
129 | // Input angle is an unsigned int from 0-255. | ||
130 | // Output is an unsigned int from 0 to 255. | ||
131 | // | ||
132 | // This approximation can vary to to 2% | ||
133 | // from the floating point value you'd get by doing | ||
134 | // float s = (sin( x ) * 128.0) + 128; | ||
135 | // | ||
136 | // Don't use this approximation for calculating the | ||
137 | // "real" trigonometric calculations, but it's great | ||
138 | // for art projects and LED displays. | ||
139 | // | ||
140 | // On Arduino/AVR, this approximation is more than | ||
141 | // 20X faster than floating point sin(x) and cos(x) | ||
142 | |||
143 | #if defined(__AVR__) && !defined(LIB8_ATTINY) | ||
144 | #define sin8 sin8_avr | ||
145 | #else | ||
146 | #define sin8 sin8_C | ||
147 | #endif | ||
148 | |||
149 | |||
150 | const uint8_t b_m16_interleave[] = { 0, 49, 49, 41, 90, 27, 117, 10 }; | ||
151 | |||
152 | /// Fast 8-bit approximation of sin(x). This approximation never varies more than | ||
153 | /// 2% from the floating point value you'd get by doing | ||
154 | /// | ||
155 | /// float s = (sin(x) * 128.0) + 128; | ||
156 | /// | ||
157 | /// @param theta input angle from 0-255 | ||
158 | /// @returns sin of theta, value between 0 and 255 | ||
159 | LIB8STATIC uint8_t sin8_avr( uint8_t theta) | ||
160 | { | ||
161 | uint8_t offset = theta; | ||
162 | |||
163 | asm volatile( | ||
164 | "sbrc %[theta],6 \n\t" | ||
165 | "com %[offset] \n\t" | ||
166 | : [theta] "+r" (theta), [offset] "+r" (offset) | ||
167 | ); | ||
168 | |||
169 | offset &= 0x3F; // 0..63 | ||
170 | |||
171 | uint8_t secoffset = offset & 0x0F; // 0..15 | ||
172 | if( theta & 0x40) secoffset++; | ||
173 | |||
174 | uint8_t m16; uint8_t b; | ||
175 | |||
176 | uint8_t section = offset >> 4; // 0..3 | ||
177 | uint8_t s2 = section * 2; | ||
178 | |||
179 | const uint8_t* p = b_m16_interleave; | ||
180 | p += s2; | ||
181 | b = *p; | ||
182 | p++; | ||
183 | m16 = *p; | ||
184 | |||
185 | uint8_t mx; | ||
186 | uint8_t xr1; | ||
187 | asm volatile( | ||
188 | "mul %[m16],%[secoffset] \n\t" | ||
189 | "mov %[mx],r0 \n\t" | ||
190 | "mov %[xr1],r1 \n\t" | ||
191 | "eor r1, r1 \n\t" | ||
192 | "swap %[mx] \n\t" | ||
193 | "andi %[mx],0x0F \n\t" | ||
194 | "swap %[xr1] \n\t" | ||
195 | "andi %[xr1], 0xF0 \n\t" | ||
196 | "or %[mx], %[xr1] \n\t" | ||
197 | : [mx] "=d" (mx), [xr1] "=d" (xr1) | ||
198 | : [m16] "d" (m16), [secoffset] "d" (secoffset) | ||
199 | ); | ||
200 | |||
201 | int8_t y = mx + b; | ||
202 | if( theta & 0x80 ) y = -y; | ||
203 | |||
204 | y += 128; | ||
205 | |||
206 | return y; | ||
207 | } | ||
208 | |||
209 | |||
210 | /// Fast 8-bit approximation of sin(x). This approximation never varies more than | ||
211 | /// 2% from the floating point value you'd get by doing | ||
212 | /// | ||
213 | /// float s = (sin(x) * 128.0) + 128; | ||
214 | /// | ||
215 | /// @param theta input angle from 0-255 | ||
216 | /// @returns sin of theta, value between 0 and 255 | ||
217 | LIB8STATIC uint8_t sin8_C( uint8_t theta) | ||
218 | { | ||
219 | uint8_t offset = theta; | ||
220 | if( theta & 0x40 ) { | ||
221 | offset = (uint8_t)255 - offset; | ||
222 | } | ||
223 | offset &= 0x3F; // 0..63 | ||
224 | |||
225 | uint8_t secoffset = offset & 0x0F; // 0..15 | ||
226 | if( theta & 0x40) secoffset++; | ||
227 | |||
228 | uint8_t section = offset >> 4; // 0..3 | ||
229 | uint8_t s2 = section * 2; | ||
230 | const uint8_t* p = b_m16_interleave; | ||
231 | p += s2; | ||
232 | uint8_t b = *p; | ||
233 | p++; | ||
234 | uint8_t m16 = *p; | ||
235 | |||
236 | uint8_t mx = (m16 * secoffset) >> 4; | ||
237 | |||
238 | int8_t y = mx + b; | ||
239 | if( theta & 0x80 ) y = -y; | ||
240 | |||
241 | y += 128; | ||
242 | |||
243 | return y; | ||
244 | } | ||
245 | |||
246 | /// Fast 8-bit approximation of cos(x). This approximation never varies more than | ||
247 | /// 2% from the floating point value you'd get by doing | ||
248 | /// | ||
249 | /// float s = (cos(x) * 128.0) + 128; | ||
250 | /// | ||
251 | /// @param theta input angle from 0-255 | ||
252 | /// @returns sin of theta, value between 0 and 255 | ||
253 | LIB8STATIC uint8_t cos8( uint8_t theta) | ||
254 | { | ||
255 | return sin8( theta + 64); | ||
256 | } | ||
257 | |||
258 | ///@} | ||
259 | #endif | ||