diff options
| author | XScorpion2 <rcalt2vt@gmail.com> | 2019-04-02 19:24:14 -0500 |
|---|---|---|
| committer | Drashna Jaelre <drashna@live.com> | 2019-04-02 17:24:14 -0700 |
| commit | c98247e3dd2958bd2d8969dc75170e7e2757b895 (patch) | |
| tree | a566de223a9501809e1059c522b52adf7d37fe74 /lib/lib8tion | |
| parent | 68d8bb2b3fb8a35fda164539d27754b3f74e0819 (diff) | |
| download | qmk_firmware-c98247e3dd2958bd2d8969dc75170e7e2757b895.tar.gz qmk_firmware-c98247e3dd2958bd2d8969dc75170e7e2757b895.zip | |
RGB Matrix Overhaul (#5372)
* RGB Matrix overhaul
Breakout of animations to separate files
Integration of optimized int based math lib
Overhaul of rgb_matrix.c and animations for performance
* Updating effect function api for future extensions
* Combined the keypresses || keyreleases define checks into a single define so I stop forgetting it where necessary
* Moving define RGB_MATRIX_KEYREACTIVE_ENABLED earlier in the include chain
Diffstat (limited to 'lib/lib8tion')
| -rw-r--r-- | lib/lib8tion/LICENSE | 20 | ||||
| -rw-r--r-- | lib/lib8tion/lib8tion.c | 242 | ||||
| -rw-r--r-- | lib/lib8tion/lib8tion.h | 934 | ||||
| -rw-r--r-- | lib/lib8tion/math8.h | 552 | ||||
| -rw-r--r-- | lib/lib8tion/random8.h | 94 | ||||
| -rw-r--r-- | lib/lib8tion/scale8.h | 542 | ||||
| -rw-r--r-- | lib/lib8tion/trig8.h | 259 |
7 files changed, 2643 insertions, 0 deletions
diff --git a/lib/lib8tion/LICENSE b/lib/lib8tion/LICENSE new file mode 100644 index 000000000..ebe476330 --- /dev/null +++ b/lib/lib8tion/LICENSE | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | The MIT License (MIT) | ||
| 2 | |||
| 3 | Copyright (c) 2013 FastLED | ||
| 4 | |||
| 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
| 6 | this software and associated documentation files (the "Software"), to deal in | ||
| 7 | the Software without restriction, including without limitation the rights to | ||
| 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
| 9 | the Software, and to permit persons to whom the Software is furnished to do so, | ||
| 10 | subject to the following conditions: | ||
| 11 | |||
| 12 | The above copyright notice and this permission notice shall be included in all | ||
| 13 | copies or substantial portions of the Software. | ||
| 14 | |||
| 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
| 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
| 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
| 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
| 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c new file mode 100644 index 000000000..84b3e9c61 --- /dev/null +++ b/lib/lib8tion/lib8tion.c | |||
| @@ -0,0 +1,242 @@ | |||
| 1 | #define FASTLED_INTERNAL | ||
| 2 | #include <stdint.h> | ||
| 3 | |||
| 4 | #define RAND16_SEED 1337 | ||
| 5 | uint16_t rand16seed = RAND16_SEED; | ||
| 6 | |||
| 7 | |||
| 8 | // memset8, memcpy8, memmove8: | ||
| 9 | // optimized avr replacements for the standard "C" library | ||
| 10 | // routines memset, memcpy, and memmove. | ||
| 11 | // | ||
| 12 | // There are two techniques that make these routines | ||
| 13 | // faster than the standard avr-libc routines. | ||
| 14 | // First, the loops are unrolled 2X, meaning that | ||
| 15 | // the average loop overhead is cut in half. | ||
| 16 | // And second, the compare-and-branch at the bottom | ||
| 17 | // of each loop decrements the low byte of the | ||
| 18 | // counter, and if the carry is clear, it branches | ||
| 19 | // back up immediately. Only if the low byte math | ||
| 20 | // causes carry do we bother to decrement the high | ||
| 21 | // byte and check that result for carry as well. | ||
| 22 | // Results for a 100-byte buffer are 20-40% faster | ||
| 23 | // than standard avr-libc, at a cost of a few extra | ||
| 24 | // bytes of code. | ||
| 25 | |||
| 26 | #if defined(__AVR__) | ||
| 27 | //__attribute__ ((noinline)) | ||
| 28 | void * memset8 ( void * ptr, uint8_t val, uint16_t num ) | ||
| 29 | { | ||
| 30 | asm volatile( | ||
| 31 | " movw r26, %[ptr] \n\t" | ||
| 32 | " sbrs %A[num], 0 \n\t" | ||
| 33 | " rjmp Lseteven_%= \n\t" | ||
| 34 | " rjmp Lsetodd_%= \n\t" | ||
| 35 | "Lsetloop_%=: \n\t" | ||
| 36 | " st X+, %[val] \n\t" | ||
| 37 | "Lsetodd_%=: \n\t" | ||
| 38 | " st X+, %[val] \n\t" | ||
| 39 | "Lseteven_%=: \n\t" | ||
| 40 | " subi %A[num], 2 \n\t" | ||
| 41 | " brcc Lsetloop_%= \n\t" | ||
| 42 | " sbci %B[num], 0 \n\t" | ||
| 43 | " brcc Lsetloop_%= \n\t" | ||
| 44 | : [num] "+r" (num) | ||
| 45 | : [ptr] "r" (ptr), | ||
| 46 | [val] "r" (val) | ||
| 47 | : "memory" | ||
| 48 | ); | ||
| 49 | return ptr; | ||
| 50 | } | ||
| 51 | |||
| 52 | |||
| 53 | |||
| 54 | //__attribute__ ((noinline)) | ||
| 55 | void * memcpy8 ( void * dst, const void* src, uint16_t num ) | ||
| 56 | { | ||
| 57 | asm volatile( | ||
| 58 | " movw r30, %[src] \n\t" | ||
| 59 | " movw r26, %[dst] \n\t" | ||
| 60 | " sbrs %A[num], 0 \n\t" | ||
| 61 | " rjmp Lcpyeven_%= \n\t" | ||
| 62 | " rjmp Lcpyodd_%= \n\t" | ||
| 63 | "Lcpyloop_%=: \n\t" | ||
| 64 | " ld __tmp_reg__, Z+ \n\t" | ||
| 65 | " st X+, __tmp_reg__ \n\t" | ||
| 66 | "Lcpyodd_%=: \n\t" | ||
| 67 | " ld __tmp_reg__, Z+ \n\t" | ||
| 68 | " st X+, __tmp_reg__ \n\t" | ||
| 69 | "Lcpyeven_%=: \n\t" | ||
| 70 | " subi %A[num], 2 \n\t" | ||
| 71 | " brcc Lcpyloop_%= \n\t" | ||
| 72 | " sbci %B[num], 0 \n\t" | ||
| 73 | " brcc Lcpyloop_%= \n\t" | ||
| 74 | : [num] "+r" (num) | ||
| 75 | : [src] "r" (src), | ||
| 76 | [dst] "r" (dst) | ||
| 77 | : "memory" | ||
| 78 | ); | ||
| 79 | return dst; | ||
| 80 | } | ||
| 81 | |||
| 82 | //__attribute__ ((noinline)) | ||
| 83 | void * memmove8 ( void * dst, const void* src, uint16_t num ) | ||
| 84 | { | ||
| 85 | if( src > dst) { | ||
| 86 | // if src > dst then we can use the forward-stepping memcpy8 | ||
| 87 | return memcpy8( dst, src, num); | ||
| 88 | } else { | ||
| 89 | // if src < dst then we have to step backward: | ||
| 90 | dst = (char*)dst + num; | ||
| 91 | src = (char*)src + num; | ||
| 92 | asm volatile( | ||
| 93 | " movw r30, %[src] \n\t" | ||
| 94 | " movw r26, %[dst] \n\t" | ||
| 95 | " sbrs %A[num], 0 \n\t" | ||
| 96 | " rjmp Lmoveven_%= \n\t" | ||
| 97 | " rjmp Lmovodd_%= \n\t" | ||
| 98 | "Lmovloop_%=: \n\t" | ||
| 99 | " ld __tmp_reg__, -Z \n\t" | ||
| 100 | " st -X, __tmp_reg__ \n\t" | ||
| 101 | "Lmovodd_%=: \n\t" | ||
| 102 | " ld __tmp_reg__, -Z \n\t" | ||
| 103 | " st -X, __tmp_reg__ \n\t" | ||
| 104 | "Lmoveven_%=: \n\t" | ||
| 105 | " subi %A[num], 2 \n\t" | ||
| 106 | " brcc Lmovloop_%= \n\t" | ||
| 107 | " sbci %B[num], 0 \n\t" | ||
| 108 | " brcc Lmovloop_%= \n\t" | ||
| 109 | : [num] "+r" (num) | ||
| 110 | : [src] "r" (src), | ||
| 111 | [dst] "r" (dst) | ||
| 112 | : "memory" | ||
| 113 | ); | ||
| 114 | return dst; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | #endif /* AVR */ | ||
| 119 | |||
| 120 | |||
| 121 | |||
| 122 | |||
| 123 | #if 0 | ||
| 124 | // TEST / VERIFICATION CODE ONLY BELOW THIS POINT | ||
| 125 | #include <Arduino.h> | ||
| 126 | #include "lib8tion.h" | ||
| 127 | |||
| 128 | void test1abs( int8_t i) | ||
| 129 | { | ||
| 130 | Serial.print("abs("); Serial.print(i); Serial.print(") = "); | ||
| 131 | int8_t j = abs8(i); | ||
| 132 | Serial.print(j); Serial.println(" "); | ||
| 133 | } | ||
| 134 | |||
| 135 | void testabs() | ||
| 136 | { | ||
| 137 | delay(5000); | ||
| 138 | for( int8_t q = -128; q != 127; q++) { | ||
| 139 | test1abs(q); | ||
| 140 | } | ||
| 141 | for(;;){}; | ||
| 142 | } | ||
| 143 | |||
| 144 | |||
| 145 | void testmul8() | ||
| 146 | { | ||
| 147 | delay(5000); | ||
| 148 | byte r, c; | ||
| 149 | |||
| 150 | Serial.println("mul8:"); | ||
| 151 | for( r = 0; r <= 20; r += 1) { | ||
| 152 | Serial.print(r); Serial.print(" : "); | ||
| 153 | for( c = 0; c <= 20; c += 1) { | ||
| 154 | byte t; | ||
| 155 | t = mul8( r, c); | ||
| 156 | Serial.print(t); Serial.print(' '); | ||
| 157 | } | ||
| 158 | Serial.println(' '); | ||
| 159 | } | ||
| 160 | Serial.println("done."); | ||
| 161 | for(;;){}; | ||
| 162 | } | ||
| 163 | |||
| 164 | |||
| 165 | void testscale8() | ||
| 166 | { | ||
| 167 | delay(5000); | ||
| 168 | byte r, c; | ||
| 169 | |||
| 170 | Serial.println("scale8:"); | ||
| 171 | for( r = 0; r <= 240; r += 10) { | ||
| 172 | Serial.print(r); Serial.print(" : "); | ||
| 173 | for( c = 0; c <= 240; c += 10) { | ||
| 174 | byte t; | ||
| 175 | t = scale8( r, c); | ||
| 176 | Serial.print(t); Serial.print(' '); | ||
| 177 | } | ||
| 178 | Serial.println(' '); | ||
| 179 | } | ||
| 180 | |||
| 181 | Serial.println(' '); | ||
| 182 | Serial.println("scale8_video:"); | ||
| 183 | |||
| 184 | for( r = 0; r <= 100; r += 4) { | ||
| 185 | Serial.print(r); Serial.print(" : "); | ||
| 186 | for( c = 0; c <= 100; c += 4) { | ||
| 187 | byte t; | ||
| 188 | t = scale8_video( r, c); | ||
| 189 | Serial.print(t); Serial.print(' '); | ||
| 190 | } | ||
| 191 | Serial.println(' '); | ||
| 192 | } | ||
| 193 | |||
| 194 | Serial.println("done."); | ||
| 195 | for(;;){}; | ||
| 196 | } | ||
| 197 | |||
| 198 | |||
| 199 | |||
| 200 | void testqadd8() | ||
| 201 | { | ||
| 202 | delay(5000); | ||
| 203 | byte r, c; | ||
| 204 | for( r = 0; r <= 240; r += 10) { | ||
| 205 | Serial.print(r); Serial.print(" : "); | ||
| 206 | for( c = 0; c <= 240; c += 10) { | ||
| 207 | byte t; | ||
| 208 | t = qadd8( r, c); | ||
| 209 | Serial.print(t); Serial.print(' '); | ||
| 210 | } | ||
| 211 | Serial.println(' '); | ||
| 212 | } | ||
| 213 | Serial.println("done."); | ||
| 214 | for(;;){}; | ||
| 215 | } | ||
| 216 | |||
| 217 | void testnscale8x3() | ||
| 218 | { | ||
| 219 | delay(5000); | ||
| 220 | byte r, g, b, sc; | ||
| 221 | for( byte z = 0; z < 10; z++) { | ||
| 222 | r = random8(); g = random8(); b = random8(); sc = random8(); | ||
| 223 | |||
| 224 | Serial.print("nscale8x3_video( "); | ||
| 225 | Serial.print(r); Serial.print(", "); | ||
| 226 | Serial.print(g); Serial.print(", "); | ||
| 227 | Serial.print(b); Serial.print(", "); | ||
| 228 | Serial.print(sc); Serial.print(") = [ "); | ||
| 229 | |||
| 230 | nscale8x3_video( r, g, b, sc); | ||
| 231 | |||
| 232 | Serial.print(r); Serial.print(", "); | ||
| 233 | Serial.print(g); Serial.print(", "); | ||
| 234 | Serial.print(b); Serial.print("]"); | ||
| 235 | |||
| 236 | Serial.println(' '); | ||
| 237 | } | ||
| 238 | Serial.println("done."); | ||
| 239 | for(;;){}; | ||
| 240 | } | ||
| 241 | |||
| 242 | #endif | ||
diff --git a/lib/lib8tion/lib8tion.h b/lib/lib8tion/lib8tion.h new file mode 100644 index 000000000..d93c748e6 --- /dev/null +++ b/lib/lib8tion/lib8tion.h | |||
| @@ -0,0 +1,934 @@ | |||
| 1 | #ifndef __INC_LIB8TION_H | ||
| 2 | #define __INC_LIB8TION_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | |||
| 6 | Fast, efficient 8-bit math functions specifically | ||
| 7 | designed for high-performance LED programming. | ||
| 8 | |||
| 9 | Because of the AVR(Arduino) and ARM assembly language | ||
| 10 | implementations provided, using these functions often | ||
| 11 | results in smaller and faster code than the equivalent | ||
| 12 | program using plain "C" arithmetic and logic. | ||
| 13 | |||
| 14 | |||
| 15 | Included are: | ||
| 16 | |||
| 17 | |||
| 18 | - Saturating unsigned 8-bit add and subtract. | ||
| 19 | Instead of wrapping around if an overflow occurs, | ||
| 20 | these routines just 'clamp' the output at a maxumum | ||
| 21 | of 255, or a minimum of 0. Useful for adding pixel | ||
| 22 | values. E.g., qadd8( 200, 100) = 255. | ||
| 23 | |||
| 24 | qadd8( i, j) == MIN( (i + j), 0xFF ) | ||
| 25 | qsub8( i, j) == MAX( (i - j), 0 ) | ||
| 26 | |||
| 27 | - Saturating signed 8-bit ("7-bit") add. | ||
| 28 | qadd7( i, j) == MIN( (i + j), 0x7F) | ||
| 29 | |||
| 30 | |||
| 31 | - Scaling (down) of unsigned 8- and 16- bit values. | ||
| 32 | Scaledown value is specified in 1/256ths. | ||
| 33 | scale8( i, sc) == (i * sc) / 256 | ||
| 34 | scale16by8( i, sc) == (i * sc) / 256 | ||
| 35 | |||
| 36 | Example: scaling a 0-255 value down into a | ||
| 37 | range from 0-99: | ||
| 38 | downscaled = scale8( originalnumber, 100); | ||
| 39 | |||
| 40 | A special version of scale8 is provided for scaling | ||
| 41 | LED brightness values, to make sure that they don't | ||
| 42 | accidentally scale down to total black at low | ||
| 43 | dimming levels, since that would look wrong: | ||
| 44 | scale8_video( i, sc) = ((i * sc) / 256) +? 1 | ||
| 45 | |||
| 46 | Example: reducing an LED brightness by a | ||
| 47 | dimming factor: | ||
| 48 | new_bright = scale8_video( orig_bright, dimming); | ||
| 49 | |||
| 50 | |||
| 51 | - Fast 8- and 16- bit unsigned random numbers. | ||
| 52 | Significantly faster than Arduino random(), but | ||
| 53 | also somewhat less random. You can add entropy. | ||
| 54 | random8() == random from 0..255 | ||
| 55 | random8( n) == random from 0..(N-1) | ||
| 56 | random8( n, m) == random from N..(M-1) | ||
| 57 | |||
| 58 | random16() == random from 0..65535 | ||
| 59 | random16( n) == random from 0..(N-1) | ||
| 60 | random16( n, m) == random from N..(M-1) | ||
| 61 | |||
| 62 | random16_set_seed( k) == seed = k | ||
| 63 | random16_add_entropy( k) == seed += k | ||
| 64 | |||
| 65 | |||
| 66 | - Absolute value of a signed 8-bit value. | ||
| 67 | abs8( i) == abs( i) | ||
| 68 | |||
| 69 | |||
| 70 | - 8-bit math operations which return 8-bit values. | ||
| 71 | These are provided mostly for completeness, | ||
| 72 | not particularly for performance. | ||
| 73 | mul8( i, j) == (i * j) & 0xFF | ||
| 74 | add8( i, j) == (i + j) & 0xFF | ||
| 75 | sub8( i, j) == (i - j) & 0xFF | ||
| 76 | |||
| 77 | |||
| 78 | - Fast 16-bit approximations of sin and cos. | ||
| 79 | Input angle is a uint16_t from 0-65535. | ||
| 80 | Output is a signed int16_t from -32767 to 32767. | ||
| 81 | sin16( x) == sin( (x/32768.0) * pi) * 32767 | ||
| 82 | cos16( x) == cos( (x/32768.0) * pi) * 32767 | ||
| 83 | Accurate to more than 99% in all cases. | ||
| 84 | |||
| 85 | - Fast 8-bit approximations of sin and cos. | ||
| 86 | Input angle is a uint8_t from 0-255. | ||
| 87 | Output is an UNsigned uint8_t from 0 to 255. | ||
| 88 | sin8( x) == (sin( (x/128.0) * pi) * 128) + 128 | ||
| 89 | cos8( x) == (cos( (x/128.0) * pi) * 128) + 128 | ||
| 90 | Accurate to within about 2%. | ||
| 91 | |||
| 92 | |||
| 93 | - Fast 8-bit "easing in/out" function. | ||
| 94 | ease8InOutCubic(x) == 3(x^i) - 2(x^3) | ||
| 95 | ease8InOutApprox(x) == | ||
| 96 | faster, rougher, approximation of cubic easing | ||
| 97 | ease8InOutQuad(x) == quadratic (vs cubic) easing | ||
| 98 | |||
| 99 | - Cubic, Quadratic, and Triangle wave functions. | ||
| 100 | Input is a uint8_t representing phase withing the wave, | ||
| 101 | similar to how sin8 takes an angle 'theta'. | ||
| 102 | Output is a uint8_t representing the amplitude of | ||
| 103 | the wave at that point. | ||
| 104 | cubicwave8( x) | ||
| 105 | quadwave8( x) | ||
| 106 | triwave8( x) | ||
| 107 | |||
| 108 | - Square root for 16-bit integers. About three times | ||
| 109 | faster and five times smaller than Arduino's built-in | ||
| 110 | generic 32-bit sqrt routine. | ||
| 111 | sqrt16( uint16_t x ) == sqrt( x) | ||
| 112 | |||
| 113 | - Dimming and brightening functions for 8-bit | ||
| 114 | light values. | ||
| 115 | dim8_video( x) == scale8_video( x, x) | ||
| 116 | dim8_raw( x) == scale8( x, x) | ||
| 117 | dim8_lin( x) == (x<128) ? ((x+1)/2) : scale8(x,x) | ||
| 118 | brighten8_video( x) == 255 - dim8_video( 255 - x) | ||
| 119 | brighten8_raw( x) == 255 - dim8_raw( 255 - x) | ||
| 120 | brighten8_lin( x) == 255 - dim8_lin( 255 - x) | ||
| 121 | The dimming functions in particular are suitable | ||
| 122 | for making LED light output appear more 'linear'. | ||
| 123 | |||
| 124 | |||
| 125 | - Linear interpolation between two values, with the | ||
| 126 | fraction between them expressed as an 8- or 16-bit | ||
| 127 | fixed point fraction (fract8 or fract16). | ||
| 128 | lerp8by8( fromU8, toU8, fract8 ) | ||
| 129 | lerp16by8( fromU16, toU16, fract8 ) | ||
| 130 | lerp15by8( fromS16, toS16, fract8 ) | ||
| 131 | == from + (( to - from ) * fract8) / 256) | ||
| 132 | lerp16by16( fromU16, toU16, fract16 ) | ||
| 133 | == from + (( to - from ) * fract16) / 65536) | ||
| 134 | map8( in, rangeStart, rangeEnd) | ||
| 135 | == map( in, 0, 255, rangeStart, rangeEnd); | ||
| 136 | |||
| 137 | - Optimized memmove, memcpy, and memset, that are | ||
| 138 | faster than standard avr-libc 1.8. | ||
| 139 | memmove8( dest, src, bytecount) | ||
| 140 | memcpy8( dest, src, bytecount) | ||
| 141 | memset8( buf, value, bytecount) | ||
| 142 | |||
| 143 | - Beat generators which return sine or sawtooth | ||
| 144 | waves in a specified number of Beats Per Minute. | ||
| 145 | Sine wave beat generators can specify a low and | ||
| 146 | high range for the output. Sawtooth wave beat | ||
| 147 | generators always range 0-255 or 0-65535. | ||
| 148 | beatsin8( BPM, low8, high8) | ||
| 149 | = (sine(beatphase) * (high8-low8)) + low8 | ||
| 150 | beatsin16( BPM, low16, high16) | ||
| 151 | = (sine(beatphase) * (high16-low16)) + low16 | ||
| 152 | beatsin88( BPM88, low16, high16) | ||
| 153 | = (sine(beatphase) * (high16-low16)) + low16 | ||
| 154 | beat8( BPM) = 8-bit repeating sawtooth wave | ||
| 155 | beat16( BPM) = 16-bit repeating sawtooth wave | ||
| 156 | beat88( BPM88) = 16-bit repeating sawtooth wave | ||
| 157 | BPM is beats per minute in either simple form | ||
| 158 | e.g. 120, or Q8.8 fixed-point form. | ||
| 159 | BPM88 is beats per minute in ONLY Q8.8 fixed-point | ||
| 160 | form. | ||
| 161 | |||
| 162 | Lib8tion is pronounced like 'libation': lie-BAY-shun | ||
| 163 | |||
| 164 | */ | ||
| 165 | |||
| 166 | |||
| 167 | |||
| 168 | #include <stdint.h> | ||
| 169 | |||
| 170 | #define LIB8STATIC __attribute__ ((unused)) static inline | ||
| 171 | #define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline | ||
| 172 | |||
| 173 | #if !defined(__AVR__) | ||
| 174 | #include <string.h> | ||
| 175 | // for memmove, memcpy, and memset if not defined here | ||
| 176 | #endif | ||
| 177 | |||
| 178 | #if defined(__arm__) | ||
| 179 | |||
| 180 | #if defined(FASTLED_TEENSY3) | ||
| 181 | // Can use Cortex M4 DSP instructions | ||
| 182 | #define QADD8_C 0 | ||
| 183 | #define QADD7_C 0 | ||
| 184 | #define QADD8_ARM_DSP_ASM 1 | ||
| 185 | #define QADD7_ARM_DSP_ASM 1 | ||
| 186 | #else | ||
| 187 | // Generic ARM | ||
| 188 | #define QADD8_C 1 | ||
| 189 | #define QADD7_C 1 | ||
| 190 | #endif | ||
| 191 | |||
| 192 | #define QSUB8_C 1 | ||
| 193 | #define SCALE8_C 1 | ||
| 194 | #define SCALE16BY8_C 1 | ||
| 195 | #define SCALE16_C 1 | ||
| 196 | #define ABS8_C 1 | ||
| 197 | #define MUL8_C 1 | ||
| 198 | #define QMUL8_C 1 | ||
| 199 | #define ADD8_C 1 | ||
| 200 | #define SUB8_C 1 | ||
| 201 | #define EASE8_C 1 | ||
| 202 | #define AVG8_C 1 | ||
| 203 | #define AVG7_C 1 | ||
| 204 | #define AVG16_C 1 | ||
| 205 | #define AVG15_C 1 | ||
| 206 | #define BLEND8_C 1 | ||
| 207 | |||
| 208 | |||
| 209 | #elif defined(__AVR__) | ||
| 210 | |||
| 211 | // AVR ATmega and friends Arduino | ||
| 212 | |||
| 213 | #define QADD8_C 0 | ||
| 214 | #define QADD7_C 0 | ||
| 215 | #define QSUB8_C 0 | ||
| 216 | #define ABS8_C 0 | ||
| 217 | #define ADD8_C 0 | ||
| 218 | #define SUB8_C 0 | ||
| 219 | #define AVG8_C 0 | ||
| 220 | #define AVG7_C 0 | ||
| 221 | #define AVG16_C 0 | ||
| 222 | #define AVG15_C 0 | ||
| 223 | |||
| 224 | #define QADD8_AVRASM 1 | ||
| 225 | #define QADD7_AVRASM 1 | ||
| 226 | #define QSUB8_AVRASM 1 | ||
| 227 | #define ABS8_AVRASM 1 | ||
| 228 | #define ADD8_AVRASM 1 | ||
| 229 | #define SUB8_AVRASM 1 | ||
| 230 | #define AVG8_AVRASM 1 | ||
| 231 | #define AVG7_AVRASM 1 | ||
| 232 | #define AVG16_AVRASM 1 | ||
| 233 | #define AVG15_AVRASM 1 | ||
| 234 | |||
| 235 | // Note: these require hardware MUL instruction | ||
| 236 | // -- sorry, ATtiny! | ||
| 237 | #if !defined(LIB8_ATTINY) | ||
| 238 | #define SCALE8_C 0 | ||
| 239 | #define SCALE16BY8_C 0 | ||
| 240 | #define SCALE16_C 0 | ||
| 241 | #define MUL8_C 0 | ||
| 242 | #define QMUL8_C 0 | ||
| 243 | #define EASE8_C 0 | ||
| 244 | #define BLEND8_C 0 | ||
| 245 | #define SCALE8_AVRASM 1 | ||
| 246 | #define SCALE16BY8_AVRASM 1 | ||
| 247 | #define SCALE16_AVRASM 1 | ||
| 248 | #define MUL8_AVRASM 1 | ||
| 249 | #define QMUL8_AVRASM 1 | ||
| 250 | #define EASE8_AVRASM 1 | ||
| 251 | #define CLEANUP_R1_AVRASM 1 | ||
| 252 | #define BLEND8_AVRASM 1 | ||
| 253 | #else | ||
| 254 | // On ATtiny, we just use C implementations | ||
| 255 | #define SCALE8_C 1 | ||
| 256 | #define SCALE16BY8_C 1 | ||
| 257 | #define SCALE16_C 1 | ||
| 258 | #define MUL8_C 1 | ||
| 259 | #define QMUL8_C 1 | ||
| 260 | #define EASE8_C 1 | ||
| 261 | #define BLEND8_C 1 | ||
| 262 | #define SCALE8_AVRASM 0 | ||
| 263 | #define SCALE16BY8_AVRASM 0 | ||
| 264 | #define SCALE16_AVRASM 0 | ||
| 265 | #define MUL8_AVRASM 0 | ||
| 266 | #define QMUL8_AVRASM 0 | ||
| 267 | #define EASE8_AVRASM 0 | ||
| 268 | #define BLEND8_AVRASM 0 | ||
| 269 | #endif | ||
| 270 | |||
| 271 | #else | ||
| 272 | |||
| 273 | // unspecified architecture, so | ||
| 274 | // no ASM, everything in C | ||
| 275 | #define QADD8_C 1 | ||
| 276 | #define QADD7_C 1 | ||
| 277 | #define QSUB8_C 1 | ||
| 278 | #define SCALE8_C 1 | ||
| 279 | #define SCALE16BY8_C 1 | ||
| 280 | #define SCALE16_C 1 | ||
| 281 | #define ABS8_C 1 | ||
| 282 | #define MUL8_C 1 | ||
| 283 | #define QMUL8_C 1 | ||
| 284 | #define ADD8_C 1 | ||
| 285 | #define SUB8_C 1 | ||
| 286 | #define EASE8_C 1 | ||
| 287 | #define AVG8_C 1 | ||
| 288 | #define AVG7_C 1 | ||
| 289 | #define AVG16_C 1 | ||
| 290 | #define AVG15_C 1 | ||
| 291 | #define BLEND8_C 1 | ||
| 292 | |||
| 293 | #endif | ||
| 294 | |||
| 295 | ///@defgroup lib8tion Fast math functions | ||
| 296 | ///A variety of functions for working with numbers. | ||
| 297 | ///@{ | ||
| 298 | |||
| 299 | |||
| 300 | /////////////////////////////////////////////////////////////////////// | ||
| 301 | // | ||
| 302 | // typdefs for fixed-point fractional types. | ||
| 303 | // | ||
| 304 | // sfract7 should be interpreted as signed 128ths. | ||
| 305 | // fract8 should be interpreted as unsigned 256ths. | ||
| 306 | // sfract15 should be interpreted as signed 32768ths. | ||
| 307 | // fract16 should be interpreted as unsigned 65536ths. | ||
| 308 | // | ||
| 309 | // Example: if a fract8 has the value "64", that should be interpreted | ||
| 310 | // as 64/256ths, or one-quarter. | ||
| 311 | // | ||
| 312 | // | ||
| 313 | // fract8 range is 0 to 0.99609375 | ||
| 314 | // in steps of 0.00390625 | ||
| 315 | // | ||
| 316 | // sfract7 range is -0.9921875 to 0.9921875 | ||
| 317 | // in steps of 0.0078125 | ||
| 318 | // | ||
| 319 | // fract16 range is 0 to 0.99998474121 | ||
| 320 | // in steps of 0.00001525878 | ||
| 321 | // | ||
| 322 | // sfract15 range is -0.99996948242 to 0.99996948242 | ||
| 323 | // in steps of 0.00003051757 | ||
| 324 | // | ||
| 325 | |||
| 326 | /// ANSI unsigned short _Fract. range is 0 to 0.99609375 | ||
| 327 | /// in steps of 0.00390625 | ||
| 328 | typedef uint8_t fract8; ///< ANSI: unsigned short _Fract | ||
| 329 | |||
| 330 | /// ANSI: signed short _Fract. range is -0.9921875 to 0.9921875 | ||
| 331 | /// in steps of 0.0078125 | ||
| 332 | typedef int8_t sfract7; ///< ANSI: signed short _Fract | ||
| 333 | |||
| 334 | /// ANSI: unsigned _Fract. range is 0 to 0.99998474121 | ||
| 335 | /// in steps of 0.00001525878 | ||
| 336 | typedef uint16_t fract16; ///< ANSI: unsigned _Fract | ||
| 337 | |||
| 338 | /// ANSI: signed _Fract. range is -0.99996948242 to 0.99996948242 | ||
| 339 | /// in steps of 0.00003051757 | ||
| 340 | typedef int16_t sfract15; ///< ANSI: signed _Fract | ||
| 341 | |||
| 342 | |||
| 343 | // accumXY types should be interpreted as X bits of integer, | ||
| 344 | // and Y bits of fraction. | ||
| 345 | // E.g., accum88 has 8 bits of int, 8 bits of fraction | ||
| 346 | |||
| 347 | typedef uint16_t accum88; ///< ANSI: unsigned short _Accum. 8 bits int, 8 bits fraction | ||
| 348 | typedef int16_t saccum78; ///< ANSI: signed short _Accum. 7 bits int, 8 bits fraction | ||
| 349 | typedef uint32_t accum1616;///< ANSI: signed _Accum. 16 bits int, 16 bits fraction | ||
| 350 | typedef int32_t saccum1516;///< ANSI: signed _Accum. 15 bits int, 16 bits fraction | ||
| 351 | typedef uint16_t accum124; ///< no direct ANSI counterpart. 12 bits int, 4 bits fraction | ||
| 352 | typedef int32_t saccum114;///< no direct ANSI counterpart. 1 bit int, 14 bits fraction | ||
| 353 | |||
| 354 | |||
| 355 | |||
| 356 | #include "math8.h" | ||
| 357 | #include "scale8.h" | ||
| 358 | #include "random8.h" | ||
| 359 | #include "trig8.h" | ||
| 360 | |||
| 361 | /////////////////////////////////////////////////////////////////////// | ||
| 362 | |||
| 363 | |||
| 364 | |||
| 365 | |||
| 366 | |||
| 367 | |||
| 368 | |||
| 369 | /////////////////////////////////////////////////////////////////////// | ||
| 370 | // | ||
| 371 | // float-to-fixed and fixed-to-float conversions | ||
| 372 | // | ||
| 373 | // Note that anything involving a 'float' on AVR will be slower. | ||
| 374 | |||
| 375 | /// sfract15ToFloat: conversion from sfract15 fixed point to | ||
| 376 | /// IEEE754 32-bit float. | ||
| 377 | LIB8STATIC float sfract15ToFloat( sfract15 y) | ||
| 378 | { | ||
| 379 | return y / 32768.0; | ||
| 380 | } | ||
| 381 | |||
| 382 | /// conversion from IEEE754 float in the range (-1,1) | ||
| 383 | /// to 16-bit fixed point. Note that the extremes of | ||
| 384 | /// one and negative one are NOT representable. The | ||
| 385 | /// representable range is basically | ||
| 386 | LIB8STATIC sfract15 floatToSfract15( float f) | ||
| 387 | { | ||
| 388 | return f * 32768.0; | ||
| 389 | } | ||
| 390 | |||
| 391 | |||
| 392 | |||
| 393 | /////////////////////////////////////////////////////////////////////// | ||
| 394 | // | ||
| 395 | // memmove8, memcpy8, and memset8: | ||
| 396 | // alternatives to memmove, memcpy, and memset that are | ||
| 397 | // faster on AVR than standard avr-libc 1.8 | ||
| 398 | |||
| 399 | #if defined(__AVR__) | ||
| 400 | void * memmove8( void * dst, const void * src, uint16_t num ); | ||
| 401 | void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline)); | ||
| 402 | void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ; | ||
| 403 | #else | ||
| 404 | // on non-AVR platforms, these names just call standard libc. | ||
| 405 | #define memmove8 memmove | ||
| 406 | #define memcpy8 memcpy | ||
| 407 | #define memset8 memset | ||
| 408 | #endif | ||
| 409 | |||
| 410 | |||
| 411 | /////////////////////////////////////////////////////////////////////// | ||
| 412 | // | ||
| 413 | // linear interpolation, such as could be used for Perlin noise, etc. | ||
| 414 | // | ||
| 415 | |||
| 416 | // A note on the structure of the lerp functions: | ||
| 417 | // The cases for b>a and b<=a are handled separately for | ||
| 418 | // speed: without knowing the relative order of a and b, | ||
| 419 | // the value (a-b) might be overflow the width of a or b, | ||
| 420 | // and have to be promoted to a wider, slower type. | ||
| 421 | // To avoid that, we separate the two cases, and are able | ||
| 422 | // to do all the math in the same width as the arguments, | ||
| 423 | // which is much faster and smaller on AVR. | ||
| 424 | |||
| 425 | /// linear interpolation between two unsigned 8-bit values, | ||
| 426 | /// with 8-bit fraction | ||
| 427 | LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac) | ||
| 428 | { | ||
| 429 | uint8_t result; | ||
| 430 | if( b > a) { | ||
| 431 | uint8_t delta = b - a; | ||
| 432 | uint8_t scaled = scale8( delta, frac); | ||
| 433 | result = a + scaled; | ||
| 434 | } else { | ||
| 435 | uint8_t delta = a - b; | ||
| 436 | uint8_t scaled = scale8( delta, frac); | ||
| 437 | result = a - scaled; | ||
| 438 | } | ||
| 439 | return result; | ||
| 440 | } | ||
| 441 | |||
| 442 | /// linear interpolation between two unsigned 16-bit values, | ||
| 443 | /// with 16-bit fraction | ||
| 444 | LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac) | ||
| 445 | { | ||
| 446 | uint16_t result; | ||
| 447 | if( b > a ) { | ||
| 448 | uint16_t delta = b - a; | ||
| 449 | uint16_t scaled = scale16(delta, frac); | ||
| 450 | result = a + scaled; | ||
| 451 | } else { | ||
| 452 | uint16_t delta = a - b; | ||
| 453 | uint16_t scaled = scale16( delta, frac); | ||
| 454 | result = a - scaled; | ||
| 455 | } | ||
| 456 | return result; | ||
| 457 | } | ||
| 458 | |||
| 459 | /// linear interpolation between two unsigned 16-bit values, | ||
| 460 | /// with 8-bit fraction | ||
| 461 | LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac) | ||
| 462 | { | ||
| 463 | uint16_t result; | ||
| 464 | if( b > a) { | ||
| 465 | uint16_t delta = b - a; | ||
| 466 | uint16_t scaled = scale16by8( delta, frac); | ||
| 467 | result = a + scaled; | ||
| 468 | } else { | ||
| 469 | uint16_t delta = a - b; | ||
| 470 | uint16_t scaled = scale16by8( delta, frac); | ||
| 471 | result = a - scaled; | ||
| 472 | } | ||
| 473 | return result; | ||
| 474 | } | ||
| 475 | |||
| 476 | /// linear interpolation between two signed 15-bit values, | ||
| 477 | /// with 8-bit fraction | ||
| 478 | LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac) | ||
| 479 | { | ||
| 480 | int16_t result; | ||
| 481 | if( b > a) { | ||
| 482 | uint16_t delta = b - a; | ||
| 483 | uint16_t scaled = scale16by8( delta, frac); | ||
| 484 | result = a + scaled; | ||
| 485 | } else { | ||
| 486 | uint16_t delta = a - b; | ||
| 487 | uint16_t scaled = scale16by8( delta, frac); | ||
| 488 | result = a - scaled; | ||
| 489 | } | ||
| 490 | return result; | ||
| 491 | } | ||
| 492 | |||
| 493 | /// linear interpolation between two signed 15-bit values, | ||
| 494 | /// with 8-bit fraction | ||
| 495 | LIB8STATIC int16_t lerp15by16( int16_t a, int16_t b, fract16 frac) | ||
| 496 | { | ||
| 497 | int16_t result; | ||
| 498 | if( b > a) { | ||
| 499 | uint16_t delta = b - a; | ||
| 500 | uint16_t scaled = scale16( delta, frac); | ||
| 501 | result = a + scaled; | ||
| 502 | } else { | ||
| 503 | uint16_t delta = a - b; | ||
| 504 | uint16_t scaled = scale16( delta, frac); | ||
| 505 | result = a - scaled; | ||
| 506 | } | ||
| 507 | return result; | ||
| 508 | } | ||
| 509 | |||
| 510 | /// map8: map from one full-range 8-bit value into a narrower | ||
| 511 | /// range of 8-bit values, possibly a range of hues. | ||
| 512 | /// | ||
| 513 | /// E.g. map myValue into a hue in the range blue..purple..pink..red | ||
| 514 | /// hue = map8( myValue, HUE_BLUE, HUE_RED); | ||
| 515 | /// | ||
| 516 | /// Combines nicely with the waveform functions (like sin8, etc) | ||
| 517 | /// to produce continuous hue gradients back and forth: | ||
| 518 | /// | ||
| 519 | /// hue = map8( sin8( myValue), HUE_BLUE, HUE_RED); | ||
| 520 | /// | ||
| 521 | /// Mathematically simiar to lerp8by8, but arguments are more | ||
| 522 | /// like Arduino's "map"; this function is similar to | ||
| 523 | /// | ||
| 524 | /// map( in, 0, 255, rangeStart, rangeEnd) | ||
| 525 | /// | ||
| 526 | /// but faster and specifically designed for 8-bit values. | ||
| 527 | LIB8STATIC uint8_t map8( uint8_t in, uint8_t rangeStart, uint8_t rangeEnd) | ||
| 528 | { | ||
| 529 | uint8_t rangeWidth = rangeEnd - rangeStart; | ||
| 530 | uint8_t out = scale8( in, rangeWidth); | ||
| 531 | out += rangeStart; | ||
| 532 | return out; | ||
| 533 | } | ||
| 534 | |||
| 535 | |||
| 536 | /////////////////////////////////////////////////////////////////////// | ||
| 537 | // | ||
| 538 | // easing functions; see http://easings.net | ||
| 539 | // | ||
| 540 | |||
| 541 | /// ease8InOutQuad: 8-bit quadratic ease-in / ease-out function | ||
| 542 | /// Takes around 13 cycles on AVR | ||
| 543 | #if EASE8_C == 1 | ||
| 544 | LIB8STATIC uint8_t ease8InOutQuad( uint8_t i) | ||
| 545 | { | ||
| 546 | uint8_t j = i; | ||
| 547 | if( j & 0x80 ) { | ||
| 548 | j = 255 - j; | ||
| 549 | } | ||
| 550 | uint8_t jj = scale8( j, j); | ||
| 551 | uint8_t jj2 = jj << 1; | ||
| 552 | if( i & 0x80 ) { | ||
| 553 | jj2 = 255 - jj2; | ||
| 554 | } | ||
| 555 | return jj2; | ||
| 556 | } | ||
| 557 | |||
| 558 | #elif EASE8_AVRASM == 1 | ||
| 559 | // This AVR asm version of ease8InOutQuad preserves one more | ||
| 560 | // low-bit of precision than the C version, and is also slightly | ||
| 561 | // smaller and faster. | ||
| 562 | LIB8STATIC uint8_t ease8InOutQuad(uint8_t val) { | ||
| 563 | uint8_t j=val; | ||
| 564 | asm volatile ( | ||
| 565 | "sbrc %[val], 7 \n" | ||
| 566 | "com %[j] \n" | ||
| 567 | "mul %[j], %[j] \n" | ||
| 568 | "add r0, %[j] \n" | ||
| 569 | "ldi %[j], 0 \n" | ||
| 570 | "adc %[j], r1 \n" | ||
| 571 | "lsl r0 \n" // carry = high bit of low byte of mul product | ||
| 572 | "rol %[j] \n" // j = (j * 2) + carry // preserve add'l bit of precision | ||
| 573 | "sbrc %[val], 7 \n" | ||
| 574 | "com %[j] \n" | ||
| 575 | "clr __zero_reg__ \n" | ||
| 576 | : [j] "+&a" (j) | ||
| 577 | : [val] "a" (val) | ||
| 578 | : "r0", "r1" | ||
| 579 | ); | ||
| 580 | return j; | ||
| 581 | } | ||
| 582 | |||
| 583 | #else | ||
| 584 | #error "No implementation for ease8InOutQuad available." | ||
| 585 | #endif | ||
| 586 | |||
| 587 | /// ease16InOutQuad: 16-bit quadratic ease-in / ease-out function | ||
| 588 | // C implementation at this point | ||
| 589 | LIB8STATIC uint16_t ease16InOutQuad( uint16_t i) | ||
| 590 | { | ||
| 591 | uint16_t j = i; | ||
| 592 | if( j & 0x8000 ) { | ||
| 593 | j = 65535 - j; | ||
| 594 | } | ||
| 595 | uint16_t jj = scale16( j, j); | ||
| 596 | uint16_t jj2 = jj << 1; | ||
| 597 | if( i & 0x8000 ) { | ||
| 598 | jj2 = 65535 - jj2; | ||
| 599 | } | ||
| 600 | return jj2; | ||
| 601 | } | ||
| 602 | |||
| 603 | |||
| 604 | /// ease8InOutCubic: 8-bit cubic ease-in / ease-out function | ||
| 605 | /// Takes around 18 cycles on AVR | ||
| 606 | LIB8STATIC fract8 ease8InOutCubic( fract8 i) | ||
| 607 | { | ||
| 608 | uint8_t ii = scale8_LEAVING_R1_DIRTY( i, i); | ||
| 609 | uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i); | ||
| 610 | |||
| 611 | uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii)); | ||
| 612 | |||
| 613 | /* the code generated for the above *'s automatically | ||
| 614 | cleans up R1, so there's no need to explicitily call | ||
| 615 | cleanup_R1(); */ | ||
| 616 | |||
| 617 | uint8_t result = r1; | ||
| 618 | |||
| 619 | // if we got "256", return 255: | ||
| 620 | if( r1 & 0x100 ) { | ||
| 621 | result = 255; | ||
| 622 | } | ||
| 623 | return result; | ||
| 624 | } | ||
| 625 | |||
| 626 | /// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function | ||
| 627 | /// shaped approximately like 'ease8InOutCubic', | ||
| 628 | /// it's never off by more than a couple of percent | ||
| 629 | /// from the actual cubic S-curve, and it executes | ||
| 630 | /// more than twice as fast. Use when the cycles | ||
| 631 | /// are more important than visual smoothness. | ||
| 632 | /// Asm version takes around 7 cycles on AVR. | ||
| 633 | |||
| 634 | #if EASE8_C == 1 | ||
| 635 | LIB8STATIC fract8 ease8InOutApprox( fract8 i) | ||
| 636 | { | ||
| 637 | if( i < 64) { | ||
| 638 | // start with slope 0.5 | ||
| 639 | i /= 2; | ||
| 640 | } else if( i > (255 - 64)) { | ||
| 641 | // end with slope 0.5 | ||
| 642 | i = 255 - i; | ||
| 643 | i /= 2; | ||
| 644 | i = 255 - i; | ||
| 645 | } else { | ||
| 646 | // in the middle, use slope 192/128 = 1.5 | ||
| 647 | i -= 64; | ||
| 648 | i += (i / 2); | ||
| 649 | i += 32; | ||
| 650 | } | ||
| 651 | |||
| 652 | return i; | ||
| 653 | } | ||
| 654 | |||
| 655 | #elif EASE8_AVRASM == 1 | ||
| 656 | LIB8STATIC uint8_t ease8InOutApprox( fract8 i) | ||
| 657 | { | ||
| 658 | // takes around 7 cycles on AVR | ||
| 659 | asm volatile ( | ||
| 660 | " subi %[i], 64 \n\t" | ||
| 661 | " cpi %[i], 128 \n\t" | ||
| 662 | " brcc Lshift_%= \n\t" | ||
| 663 | |||
| 664 | // middle case | ||
| 665 | " mov __tmp_reg__, %[i] \n\t" | ||
| 666 | " lsr __tmp_reg__ \n\t" | ||
| 667 | " add %[i], __tmp_reg__ \n\t" | ||
| 668 | " subi %[i], 224 \n\t" | ||
| 669 | " rjmp Ldone_%= \n\t" | ||
| 670 | |||
| 671 | // start or end case | ||
| 672 | "Lshift_%=: \n\t" | ||
| 673 | " lsr %[i] \n\t" | ||
| 674 | " subi %[i], 96 \n\t" | ||
| 675 | |||
| 676 | "Ldone_%=: \n\t" | ||
| 677 | |||
| 678 | : [i] "+&a" (i) | ||
| 679 | : | ||
| 680 | : "r0", "r1" | ||
| 681 | ); | ||
| 682 | return i; | ||
| 683 | } | ||
| 684 | #else | ||
| 685 | #error "No implementation for ease8 available." | ||
| 686 | #endif | ||
| 687 | |||
| 688 | |||
| 689 | |||
| 690 | /// triwave8: triangle (sawtooth) wave generator. Useful for | ||
| 691 | /// turning a one-byte ever-increasing value into a | ||
| 692 | /// one-byte value that oscillates up and down. | ||
| 693 | /// | ||
| 694 | /// input output | ||
| 695 | /// 0..127 0..254 (positive slope) | ||
| 696 | /// 128..255 254..0 (negative slope) | ||
| 697 | /// | ||
| 698 | /// On AVR this function takes just three cycles. | ||
| 699 | /// | ||
| 700 | LIB8STATIC uint8_t triwave8(uint8_t in) | ||
| 701 | { | ||
| 702 | if( in & 0x80) { | ||
| 703 | in = 255 - in; | ||
| 704 | } | ||
| 705 | uint8_t out = in << 1; | ||
| 706 | return out; | ||
| 707 | } | ||
| 708 | |||
| 709 | |||
| 710 | // quadwave8 and cubicwave8: S-shaped wave generators (like 'sine'). | ||
| 711 | // Useful for turning a one-byte 'counter' value into a | ||
| 712 | // one-byte oscillating value that moves smoothly up and down, | ||
| 713 | // with an 'acceleration' and 'deceleration' curve. | ||
| 714 | // | ||
| 715 | // These are even faster than 'sin8', and have | ||
| 716 | // slightly different curve shapes. | ||
| 717 | // | ||
| 718 | |||
| 719 | /// quadwave8: quadratic waveform generator. Spends just a little more | ||
| 720 | /// time at the limits than 'sine' does. | ||
| 721 | LIB8STATIC uint8_t quadwave8(uint8_t in) | ||
| 722 | { | ||
| 723 | return ease8InOutQuad( triwave8( in)); | ||
| 724 | } | ||
| 725 | |||
| 726 | /// cubicwave8: cubic waveform generator. Spends visibly more time | ||
| 727 | /// at the limits than 'sine' does. | ||
| 728 | LIB8STATIC uint8_t cubicwave8(uint8_t in) | ||
| 729 | { | ||
| 730 | return ease8InOutCubic( triwave8( in)); | ||
| 731 | } | ||
| 732 | |||
| 733 | /// squarewave8: square wave generator. Useful for | ||
| 734 | /// turning a one-byte ever-increasing value | ||
| 735 | /// into a one-byte value that is either 0 or 255. | ||
| 736 | /// The width of the output 'pulse' is | ||
| 737 | /// determined by the pulsewidth argument: | ||
| 738 | /// | ||
| 739 | ///~~~ | ||
| 740 | /// If pulsewidth is 255, output is always 255. | ||
| 741 | /// If pulsewidth < 255, then | ||
| 742 | /// if input < pulsewidth then output is 255 | ||
| 743 | /// if input >= pulsewidth then output is 0 | ||
| 744 | ///~~~ | ||
| 745 | /// | ||
| 746 | /// the output looking like: | ||
| 747 | /// | ||
| 748 | ///~~~ | ||
| 749 | /// 255 +--pulsewidth--+ | ||
| 750 | /// . | | | ||
| 751 | /// 0 0 +--------(256-pulsewidth)-------- | ||
| 752 | ///~~~ | ||
| 753 | /// | ||
| 754 | /// @param in | ||
| 755 | /// @param pulsewidth | ||
| 756 | /// @returns square wave output | ||
| 757 | LIB8STATIC uint8_t squarewave8( uint8_t in, uint8_t pulsewidth) | ||
| 758 | { | ||
| 759 | if( in < pulsewidth || (pulsewidth == 255)) { | ||
| 760 | return 255; | ||
| 761 | } else { | ||
| 762 | return 0; | ||
| 763 | } | ||
| 764 | } | ||
| 765 | |||
| 766 | |||
| 767 | // Beat generators - These functions produce waves at a given | ||
| 768 | // number of 'beats per minute'. Internally, they use | ||
| 769 | // the Arduino function 'millis' to track elapsed time. | ||
| 770 | // Accuracy is a bit better than one part in a thousand. | ||
| 771 | // | ||
| 772 | // beat8( BPM ) returns an 8-bit value that cycles 'BPM' times | ||
| 773 | // per minute, rising from 0 to 255, resetting to zero, | ||
| 774 | // rising up again, etc.. The output of this function | ||
| 775 | // is suitable for feeding directly into sin8, and cos8, | ||
| 776 | // triwave8, quadwave8, and cubicwave8. | ||
| 777 | // beat16( BPM ) returns a 16-bit value that cycles 'BPM' times | ||
| 778 | // per minute, rising from 0 to 65535, resetting to zero, | ||
| 779 | // rising up again, etc. The output of this function is | ||
| 780 | // suitable for feeding directly into sin16 and cos16. | ||
| 781 | // beat88( BPM88) is the same as beat16, except that the BPM88 argument | ||
| 782 | // MUST be in Q8.8 fixed point format, e.g. 120BPM must | ||
| 783 | // be specified as 120*256 = 30720. | ||
| 784 | // beatsin8( BPM, uint8_t low, uint8_t high) returns an 8-bit value that | ||
| 785 | // rises and falls in a sine wave, 'BPM' times per minute, | ||
| 786 | // between the values of 'low' and 'high'. | ||
| 787 | // beatsin16( BPM, uint16_t low, uint16_t high) returns a 16-bit value | ||
| 788 | // that rises and falls in a sine wave, 'BPM' times per | ||
| 789 | // minute, between the values of 'low' and 'high'. | ||
| 790 | // beatsin88( BPM88, ...) is the same as beatsin16, except that the | ||
| 791 | // BPM88 argument MUST be in Q8.8 fixed point format, | ||
| 792 | // e.g. 120BPM must be specified as 120*256 = 30720. | ||
| 793 | // | ||
| 794 | // BPM can be supplied two ways. The simpler way of specifying BPM is as | ||
| 795 | // a simple 8-bit integer from 1-255, (e.g., "120"). | ||
| 796 | // The more sophisticated way of specifying BPM allows for fractional | ||
| 797 | // "Q8.8" fixed point number (an 'accum88') with an 8-bit integer part and | ||
| 798 | // an 8-bit fractional part. The easiest way to construct this is to multiply | ||
| 799 | // a floating point BPM value (e.g. 120.3) by 256, (e.g. resulting in 30796 | ||
| 800 | // in this case), and pass that as the 16-bit BPM argument. | ||
| 801 | // "BPM88" MUST always be specified in Q8.8 format. | ||
| 802 | // | ||
| 803 | // Originally designed to make an entire animation project pulse with brightness. | ||
| 804 | // For that effect, add this line just above your existing call to "FastLED.show()": | ||
| 805 | // | ||
| 806 | // uint8_t bright = beatsin8( 60 /*BPM*/, 192 /*dimmest*/, 255 /*brightest*/ )); | ||
| 807 | // FastLED.setBrightness( bright ); | ||
| 808 | // FastLED.show(); | ||
| 809 | // | ||
| 810 | // The entire animation will now pulse between brightness 192 and 255 once per second. | ||
| 811 | |||
| 812 | |||
| 813 | // The beat generators need access to a millisecond counter. | ||
| 814 | // On Arduino, this is "millis()". On other platforms, you'll | ||
| 815 | // need to provide a function with this signature: | ||
| 816 | // uint32_t get_millisecond_timer(); | ||
| 817 | // that provides similar functionality. | ||
| 818 | // You can also force use of the get_millisecond_timer function | ||
| 819 | // by #defining USE_GET_MILLISECOND_TIMER. | ||
| 820 | #if (defined(ARDUINO) || defined(SPARK) || defined(FASTLED_HAS_MILLIS)) && !defined(USE_GET_MILLISECOND_TIMER) | ||
| 821 | // Forward declaration of Arduino function 'millis'. | ||
| 822 | //uint32_t millis(); | ||
| 823 | #define GET_MILLIS millis | ||
| 824 | #else | ||
| 825 | uint32_t get_millisecond_timer(void); | ||
| 826 | #define GET_MILLIS get_millisecond_timer | ||
| 827 | #endif | ||
| 828 | |||
| 829 | // beat16 generates a 16-bit 'sawtooth' wave at a given BPM, | ||
| 830 | /// with BPM specified in Q8.8 fixed-point format; e.g. | ||
| 831 | /// for this function, 120 BPM MUST BE specified as | ||
| 832 | /// 120*256 = 30720. | ||
| 833 | /// If you just want to specify "120", use beat16 or beat8. | ||
| 834 | LIB8STATIC uint16_t beat88( accum88 beats_per_minute_88, uint32_t timebase) | ||
| 835 | { | ||
| 836 | // BPM is 'beats per minute', or 'beats per 60000ms'. | ||
| 837 | // To avoid using the (slower) division operator, we | ||
| 838 | // want to convert 'beats per 60000ms' to 'beats per 65536ms', | ||
| 839 | // and then use a simple, fast bit-shift to divide by 65536. | ||
| 840 | // | ||
| 841 | // The ratio 65536:60000 is 279.620266667:256; we'll call it 280:256. | ||
| 842 | // The conversion is accurate to about 0.05%, more or less, | ||
| 843 | // e.g. if you ask for "120 BPM", you'll get about "119.93". | ||
| 844 | return (((GET_MILLIS()) - timebase) * beats_per_minute_88 * 280) >> 16; | ||
| 845 | } | ||
| 846 | |||
| 847 | /// beat16 generates a 16-bit 'sawtooth' wave at a given BPM | ||
| 848 | LIB8STATIC uint16_t beat16( accum88 beats_per_minute, uint32_t timebase) | ||
| 849 | { | ||
| 850 | // Convert simple 8-bit BPM's to full Q8.8 accum88's if needed | ||
| 851 | if( beats_per_minute < 256) beats_per_minute <<= 8; | ||
| 852 | return beat88(beats_per_minute, timebase); | ||
| 853 | } | ||
| 854 | |||
| 855 | /// beat8 generates an 8-bit 'sawtooth' wave at a given BPM | ||
| 856 | LIB8STATIC uint8_t beat8( accum88 beats_per_minute, uint32_t timebase) | ||
| 857 | { | ||
| 858 | return beat16( beats_per_minute, timebase) >> 8; | ||
| 859 | } | ||
| 860 | |||
| 861 | /// beatsin88 generates a 16-bit sine wave at a given BPM, | ||
| 862 | /// that oscillates within a given range. | ||
| 863 | /// For this function, BPM MUST BE SPECIFIED as | ||
| 864 | /// a Q8.8 fixed-point value; e.g. 120BPM must be | ||
| 865 | /// specified as 120*256 = 30720. | ||
| 866 | /// If you just want to specify "120", use beatsin16 or beatsin8. | ||
| 867 | LIB8STATIC uint16_t beatsin88( accum88 beats_per_minute_88, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset) | ||
| 868 | { | ||
| 869 | uint16_t beat = beat88( beats_per_minute_88, timebase); | ||
| 870 | uint16_t beatsin = (sin16( beat + phase_offset) + 32768); | ||
| 871 | uint16_t rangewidth = highest - lowest; | ||
| 872 | uint16_t scaledbeat = scale16( beatsin, rangewidth); | ||
| 873 | uint16_t result = lowest + scaledbeat; | ||
| 874 | return result; | ||
| 875 | } | ||
| 876 | |||
| 877 | /// beatsin16 generates a 16-bit sine wave at a given BPM, | ||
| 878 | /// that oscillates within a given range. | ||
| 879 | LIB8STATIC uint16_t beatsin16(accum88 beats_per_minute, uint16_t lowest, uint16_t highest, uint32_t timebase, uint16_t phase_offset) | ||
| 880 | { | ||
| 881 | uint16_t beat = beat16( beats_per_minute, timebase); | ||
| 882 | uint16_t beatsin = (sin16( beat + phase_offset) + 32768); | ||
| 883 | uint16_t rangewidth = highest - lowest; | ||
| 884 | uint16_t scaledbeat = scale16( beatsin, rangewidth); | ||
| 885 | uint16_t result = lowest + scaledbeat; | ||
| 886 | return result; | ||
| 887 | } | ||
| 888 | |||
| 889 | /// beatsin8 generates an 8-bit sine wave at a given BPM, | ||
| 890 | /// that oscillates within a given range. | ||
| 891 | LIB8STATIC uint8_t beatsin8( accum88 beats_per_minute, uint8_t lowest, uint8_t highest, uint32_t timebase, uint8_t phase_offset) | ||
| 892 | { | ||
| 893 | uint8_t beat = beat8( beats_per_minute, timebase); | ||
| 894 | uint8_t beatsin = sin8( beat + phase_offset); | ||
| 895 | uint8_t rangewidth = highest - lowest; | ||
| 896 | uint8_t scaledbeat = scale8( beatsin, rangewidth); | ||
| 897 | uint8_t result = lowest + scaledbeat; | ||
| 898 | return result; | ||
| 899 | } | ||
| 900 | |||
| 901 | |||
| 902 | /// Return the current seconds since boot in a 16-bit value. Used as part of the | ||
| 903 | /// "every N time-periods" mechanism | ||
| 904 | LIB8STATIC uint16_t seconds16(void) | ||
| 905 | { | ||
| 906 | uint32_t ms = GET_MILLIS(); | ||
| 907 | uint16_t s16; | ||
| 908 | s16 = ms / 1000; | ||
| 909 | return s16; | ||
| 910 | } | ||
| 911 | |||
| 912 | /// Return the current minutes since boot in a 16-bit value. Used as part of the | ||
| 913 | /// "every N time-periods" mechanism | ||
| 914 | LIB8STATIC uint16_t minutes16(void) | ||
| 915 | { | ||
| 916 | uint32_t ms = GET_MILLIS(); | ||
| 917 | uint16_t m16; | ||
| 918 | m16 = (ms / (60000L)) & 0xFFFF; | ||
| 919 | return m16; | ||
| 920 | } | ||
| 921 | |||
| 922 | /// Return the current hours since boot in an 8-bit value. Used as part of the | ||
| 923 | /// "every N time-periods" mechanism | ||
| 924 | LIB8STATIC uint8_t hours8(void) | ||
| 925 | { | ||
| 926 | uint32_t ms = GET_MILLIS(); | ||
| 927 | uint8_t h8; | ||
| 928 | h8 = (ms / (3600000L)) & 0xFF; | ||
| 929 | return h8; | ||
| 930 | } | ||
| 931 | |||
| 932 | ///@} | ||
| 933 | |||
| 934 | #endif | ||
diff --git a/lib/lib8tion/math8.h b/lib/lib8tion/math8.h new file mode 100644 index 000000000..8c6b6c227 --- /dev/null +++ b/lib/lib8tion/math8.h | |||
| @@ -0,0 +1,552 @@ | |||
| 1 | #ifndef __INC_LIB8TION_MATH_H | ||
| 2 | #define __INC_LIB8TION_MATH_H | ||
| 3 | |||
| 4 | #include "scale8.h" | ||
| 5 | |||
| 6 | ///@ingroup lib8tion | ||
| 7 | |||
| 8 | ///@defgroup Math Basic math operations | ||
| 9 | /// Fast, efficient 8-bit math functions specifically | ||
| 10 | /// designed for high-performance LED programming. | ||
| 11 | /// | ||
| 12 | /// Because of the AVR(Arduino) and ARM assembly language | ||
| 13 | /// implementations provided, using these functions often | ||
| 14 | /// results in smaller and faster code than the equivalent | ||
| 15 | /// program using plain "C" arithmetic and logic. | ||
| 16 | ///@{ | ||
| 17 | |||
| 18 | |||
| 19 | /// add one byte to another, saturating at 0xFF | ||
| 20 | /// @param i - first byte to add | ||
| 21 | /// @param j - second byte to add | ||
| 22 | /// @returns the sum of i & j, capped at 0xFF | ||
| 23 | LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j) | ||
| 24 | { | ||
| 25 | #if QADD8_C == 1 | ||
| 26 | uint16_t t = i + j; | ||
| 27 | if (t > 255) t = 255; | ||
| 28 | return t; | ||
| 29 | #elif QADD8_AVRASM == 1 | ||
| 30 | asm volatile( | ||
| 31 | /* First, add j to i, conditioning the C flag */ | ||
| 32 | "add %0, %1 \n\t" | ||
| 33 | |||
| 34 | /* Now test the C flag. | ||
| 35 | If C is clear, we branch around a load of 0xFF into i. | ||
| 36 | If C is set, we go ahead and load 0xFF into i. | ||
| 37 | */ | ||
| 38 | "brcc L_%= \n\t" | ||
| 39 | "ldi %0, 0xFF \n\t" | ||
| 40 | "L_%=: " | ||
| 41 | : "+a" (i) | ||
| 42 | : "a" (j) ); | ||
| 43 | return i; | ||
| 44 | #elif QADD8_ARM_DSP_ASM == 1 | ||
| 45 | asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j)); | ||
| 46 | return i; | ||
| 47 | #else | ||
| 48 | #error "No implementation for qadd8 available." | ||
| 49 | #endif | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Add one byte to another, saturating at 0x7F | ||
| 53 | /// @param i - first byte to add | ||
| 54 | /// @param j - second byte to add | ||
| 55 | /// @returns the sum of i & j, capped at 0xFF | ||
| 56 | LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j) | ||
| 57 | { | ||
| 58 | #if QADD7_C == 1 | ||
| 59 | int16_t t = i + j; | ||
| 60 | if (t > 127) t = 127; | ||
| 61 | return t; | ||
| 62 | #elif QADD7_AVRASM == 1 | ||
| 63 | asm volatile( | ||
| 64 | /* First, add j to i, conditioning the V flag */ | ||
| 65 | "add %0, %1 \n\t" | ||
| 66 | |||
| 67 | /* Now test the V flag. | ||
| 68 | If V is clear, we branch around a load of 0x7F into i. | ||
| 69 | If V is set, we go ahead and load 0x7F into i. | ||
| 70 | */ | ||
| 71 | "brvc L_%= \n\t" | ||
| 72 | "ldi %0, 0x7F \n\t" | ||
| 73 | "L_%=: " | ||
| 74 | : "+a" (i) | ||
| 75 | : "a" (j) ); | ||
| 76 | |||
| 77 | return i; | ||
| 78 | #elif QADD7_ARM_DSP_ASM == 1 | ||
| 79 | asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j)); | ||
| 80 | return i; | ||
| 81 | #else | ||
| 82 | #error "No implementation for qadd7 available." | ||
| 83 | #endif | ||
| 84 | } | ||
| 85 | |||
| 86 | /// subtract one byte from another, saturating at 0x00 | ||
| 87 | /// @returns i - j with a floor of 0 | ||
| 88 | LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j) | ||
| 89 | { | ||
| 90 | #if QSUB8_C == 1 | ||
| 91 | int16_t t = i - j; | ||
| 92 | if (t < 0) t = 0; | ||
| 93 | return t; | ||
| 94 | #elif QSUB8_AVRASM == 1 | ||
| 95 | |||
| 96 | asm volatile( | ||
| 97 | /* First, subtract j from i, conditioning the C flag */ | ||
| 98 | "sub %0, %1 \n\t" | ||
| 99 | |||
| 100 | /* Now test the C flag. | ||
| 101 | If C is clear, we branch around a load of 0x00 into i. | ||
| 102 | If C is set, we go ahead and load 0x00 into i. | ||
| 103 | */ | ||
| 104 | "brcc L_%= \n\t" | ||
| 105 | "ldi %0, 0x00 \n\t" | ||
| 106 | "L_%=: " | ||
| 107 | : "+a" (i) | ||
| 108 | : "a" (j) ); | ||
| 109 | |||
| 110 | return i; | ||
| 111 | #else | ||
| 112 | #error "No implementation for qsub8 available." | ||
| 113 | #endif | ||
| 114 | } | ||
| 115 | |||
| 116 | /// add one byte to another, with one byte result | ||
| 117 | LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j) | ||
| 118 | { | ||
| 119 | #if ADD8_C == 1 | ||
| 120 | uint16_t t = i + j; | ||
| 121 | return t; | ||
| 122 | #elif ADD8_AVRASM == 1 | ||
| 123 | // Add j to i, period. | ||
| 124 | asm volatile( "add %0, %1" : "+a" (i) : "a" (j)); | ||
| 125 | return i; | ||
| 126 | #else | ||
| 127 | #error "No implementation for add8 available." | ||
| 128 | #endif | ||
| 129 | } | ||
| 130 | |||
| 131 | /// add one byte to another, with one byte result | ||
| 132 | LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j) | ||
| 133 | { | ||
| 134 | #if ADD8_C == 1 | ||
| 135 | uint16_t t = i + j; | ||
| 136 | return t; | ||
| 137 | #elif ADD8_AVRASM == 1 | ||
| 138 | // Add i(one byte) to j(two bytes) | ||
| 139 | asm volatile( "add %A[j], %[i] \n\t" | ||
| 140 | "adc %B[j], __zero_reg__ \n\t" | ||
| 141 | : [j] "+a" (j) | ||
| 142 | : [i] "a" (i) | ||
| 143 | ); | ||
| 144 | return i; | ||
| 145 | #else | ||
| 146 | #error "No implementation for add8to16 available." | ||
| 147 | #endif | ||
| 148 | } | ||
| 149 | |||
| 150 | |||
| 151 | /// subtract one byte from another, 8-bit result | ||
| 152 | LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j) | ||
| 153 | { | ||
| 154 | #if SUB8_C == 1 | ||
| 155 | int16_t t = i - j; | ||
| 156 | return t; | ||
| 157 | #elif SUB8_AVRASM == 1 | ||
| 158 | // Subtract j from i, period. | ||
| 159 | asm volatile( "sub %0, %1" : "+a" (i) : "a" (j)); | ||
| 160 | return i; | ||
| 161 | #else | ||
| 162 | #error "No implementation for sub8 available." | ||
| 163 | #endif | ||
| 164 | } | ||
| 165 | |||
| 166 | /// Calculate an integer average of two unsigned | ||
| 167 | /// 8-bit integer values (uint8_t). | ||
| 168 | /// Fractional results are rounded down, e.g. avg8(20,41) = 30 | ||
| 169 | LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j) | ||
| 170 | { | ||
| 171 | #if AVG8_C == 1 | ||
| 172 | return (i + j) >> 1; | ||
| 173 | #elif AVG8_AVRASM == 1 | ||
| 174 | asm volatile( | ||
| 175 | /* First, add j to i, 9th bit overflows into C flag */ | ||
| 176 | "add %0, %1 \n\t" | ||
| 177 | /* Divide by two, moving C flag into high 8th bit */ | ||
| 178 | "ror %0 \n\t" | ||
| 179 | : "+a" (i) | ||
| 180 | : "a" (j) ); | ||
| 181 | return i; | ||
| 182 | #else | ||
| 183 | #error "No implementation for avg8 available." | ||
| 184 | #endif | ||
| 185 | } | ||
| 186 | |||
| 187 | /// Calculate an integer average of two unsigned | ||
| 188 | /// 16-bit integer values (uint16_t). | ||
| 189 | /// Fractional results are rounded down, e.g. avg16(20,41) = 30 | ||
| 190 | LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j) | ||
| 191 | { | ||
| 192 | #if AVG16_C == 1 | ||
| 193 | return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1; | ||
| 194 | #elif AVG16_AVRASM == 1 | ||
| 195 | asm volatile( | ||
| 196 | /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */ | ||
| 197 | "add %A[i], %A[j] \n\t" | ||
| 198 | /* Now, add C + jHi to iHi, 17th bit overflows into C flag */ | ||
| 199 | "adc %B[i], %B[j] \n\t" | ||
| 200 | /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */ | ||
| 201 | "ror %B[i] \n\t" | ||
| 202 | /* Divide iLo by two, moving C flag into high 8th bit */ | ||
| 203 | "ror %A[i] \n\t" | ||
| 204 | : [i] "+a" (i) | ||
| 205 | : [j] "a" (j) ); | ||
| 206 | return i; | ||
| 207 | #else | ||
| 208 | #error "No implementation for avg16 available." | ||
| 209 | #endif | ||
| 210 | } | ||
| 211 | |||
| 212 | |||
| 213 | /// Calculate an integer average of two signed 7-bit | ||
| 214 | /// integers (int8_t) | ||
| 215 | /// If the first argument is even, result is rounded down. | ||
| 216 | /// If the first argument is odd, result is result up. | ||
| 217 | LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j) | ||
| 218 | { | ||
| 219 | #if AVG7_C == 1 | ||
| 220 | return ((i + j) >> 1) + (i & 0x1); | ||
| 221 | #elif AVG7_AVRASM == 1 | ||
| 222 | asm volatile( | ||
| 223 | "asr %1 \n\t" | ||
| 224 | "asr %0 \n\t" | ||
| 225 | "adc %0, %1 \n\t" | ||
| 226 | : "+a" (i) | ||
| 227 | : "a" (j) ); | ||
| 228 | return i; | ||
| 229 | #else | ||
| 230 | #error "No implementation for avg7 available." | ||
| 231 | #endif | ||
| 232 | } | ||
| 233 | |||
| 234 | /// Calculate an integer average of two signed 15-bit | ||
| 235 | /// integers (int16_t) | ||
| 236 | /// If the first argument is even, result is rounded down. | ||
| 237 | /// If the first argument is odd, result is result up. | ||
| 238 | LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j) | ||
| 239 | { | ||
| 240 | #if AVG15_C == 1 | ||
| 241 | return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1); | ||
| 242 | #elif AVG15_AVRASM == 1 | ||
| 243 | asm volatile( | ||
| 244 | /* first divide j by 2, throwing away lowest bit */ | ||
| 245 | "asr %B[j] \n\t" | ||
| 246 | "ror %A[j] \n\t" | ||
| 247 | /* now divide i by 2, with lowest bit going into C */ | ||
| 248 | "asr %B[i] \n\t" | ||
| 249 | "ror %A[i] \n\t" | ||
| 250 | /* add j + C to i */ | ||
| 251 | "adc %A[i], %A[j] \n\t" | ||
| 252 | "adc %B[i], %B[j] \n\t" | ||
| 253 | : [i] "+a" (i) | ||
| 254 | : [j] "a" (j) ); | ||
| 255 | return i; | ||
| 256 | #else | ||
| 257 | #error "No implementation for avg15 available." | ||
| 258 | #endif | ||
| 259 | } | ||
| 260 | |||
| 261 | |||
| 262 | /// Calculate the remainder of one unsigned 8-bit | ||
| 263 | /// value divided by anoter, aka A % M. | ||
| 264 | /// Implemented by repeated subtraction, which is | ||
| 265 | /// very compact, and very fast if A is 'probably' | ||
| 266 | /// less than M. If A is a large multiple of M, | ||
| 267 | /// the loop has to execute multiple times. However, | ||
| 268 | /// even in that case, the loop is only two | ||
| 269 | /// instructions long on AVR, i.e., quick. | ||
| 270 | LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m) | ||
| 271 | { | ||
| 272 | #if defined(__AVR__) | ||
| 273 | asm volatile ( | ||
| 274 | "L_%=: sub %[a],%[m] \n\t" | ||
| 275 | " brcc L_%= \n\t" | ||
| 276 | " add %[a],%[m] \n\t" | ||
| 277 | : [a] "+r" (a) | ||
| 278 | : [m] "r" (m) | ||
| 279 | ); | ||
| 280 | #else | ||
| 281 | while( a >= m) a -= m; | ||
| 282 | #endif | ||
| 283 | return a; | ||
| 284 | } | ||
| 285 | |||
| 286 | /// Add two numbers, and calculate the modulo | ||
| 287 | /// of the sum and a third number, M. | ||
| 288 | /// In other words, it returns (A+B) % M. | ||
| 289 | /// It is designed as a compact mechanism for | ||
| 290 | /// incrementing a 'mode' switch and wrapping | ||
| 291 | /// around back to 'mode 0' when the switch | ||
| 292 | /// goes past the end of the available range. | ||
| 293 | /// e.g. if you have seven modes, this switches | ||
| 294 | /// to the next one and wraps around if needed: | ||
| 295 | /// mode = addmod8( mode, 1, 7); | ||
| 296 | ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance. | ||
| 297 | LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m) | ||
| 298 | { | ||
| 299 | #if defined(__AVR__) | ||
| 300 | asm volatile ( | ||
| 301 | " add %[a],%[b] \n\t" | ||
| 302 | "L_%=: sub %[a],%[m] \n\t" | ||
| 303 | " brcc L_%= \n\t" | ||
| 304 | " add %[a],%[m] \n\t" | ||
| 305 | : [a] "+r" (a) | ||
| 306 | : [b] "r" (b), [m] "r" (m) | ||
| 307 | ); | ||
| 308 | #else | ||
| 309 | a += b; | ||
| 310 | while( a >= m) a -= m; | ||
| 311 | #endif | ||
| 312 | return a; | ||
| 313 | } | ||
| 314 | |||
| 315 | /// Subtract two numbers, and calculate the modulo | ||
| 316 | /// of the difference and a third number, M. | ||
| 317 | /// In other words, it returns (A-B) % M. | ||
| 318 | /// It is designed as a compact mechanism for | ||
| 319 | /// incrementing a 'mode' switch and wrapping | ||
| 320 | /// around back to 'mode 0' when the switch | ||
| 321 | /// goes past the end of the available range. | ||
| 322 | /// e.g. if you have seven modes, this switches | ||
| 323 | /// to the next one and wraps around if needed: | ||
| 324 | /// mode = addmod8( mode, 1, 7); | ||
| 325 | ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance. | ||
| 326 | LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m) | ||
| 327 | { | ||
| 328 | #if defined(__AVR__) | ||
| 329 | asm volatile ( | ||
| 330 | " sub %[a],%[b] \n\t" | ||
| 331 | "L_%=: sub %[a],%[m] \n\t" | ||
| 332 | " brcc L_%= \n\t" | ||
| 333 | " add %[a],%[m] \n\t" | ||
| 334 | : [a] "+r" (a) | ||
| 335 | : [b] "r" (b), [m] "r" (m) | ||
| 336 | ); | ||
| 337 | #else | ||
| 338 | a -= b; | ||
| 339 | while( a >= m) a -= m; | ||
| 340 | #endif | ||
| 341 | return a; | ||
| 342 | } | ||
| 343 | |||
| 344 | /// 8x8 bit multiplication, with 8 bit result | ||
| 345 | LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j) | ||
| 346 | { | ||
| 347 | #if MUL8_C == 1 | ||
| 348 | return ((uint16_t)i * (uint16_t)(j) ) & 0xFF; | ||
| 349 | #elif MUL8_AVRASM == 1 | ||
| 350 | asm volatile( | ||
| 351 | /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ | ||
| 352 | "mul %0, %1 \n\t" | ||
| 353 | /* Extract the LOW 8-bits (r0) */ | ||
| 354 | "mov %0, r0 \n\t" | ||
| 355 | /* Restore r1 to "0"; it's expected to always be that */ | ||
| 356 | "clr __zero_reg__ \n\t" | ||
| 357 | : "+a" (i) | ||
| 358 | : "a" (j) | ||
| 359 | : "r0", "r1"); | ||
| 360 | |||
| 361 | return i; | ||
| 362 | #else | ||
| 363 | #error "No implementation for mul8 available." | ||
| 364 | #endif | ||
| 365 | } | ||
| 366 | |||
| 367 | |||
| 368 | /// saturating 8x8 bit multiplication, with 8 bit result | ||
| 369 | /// @returns the product of i * j, capping at 0xFF | ||
| 370 | LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j) | ||
| 371 | { | ||
| 372 | #if QMUL8_C == 1 | ||
| 373 | int p = ((uint16_t)i * (uint16_t)(j) ); | ||
| 374 | if( p > 255) p = 255; | ||
| 375 | return p; | ||
| 376 | #elif QMUL8_AVRASM == 1 | ||
| 377 | asm volatile( | ||
| 378 | /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */ | ||
| 379 | " mul %0, %1 \n\t" | ||
| 380 | /* If high byte of result is zero, all is well. */ | ||
| 381 | " tst r1 \n\t" | ||
| 382 | " breq Lnospill_%= \n\t" | ||
| 383 | /* If high byte of result > 0, saturate low byte to 0xFF */ | ||
| 384 | " ldi %0,0xFF \n\t" | ||
| 385 | " rjmp Ldone_%= \n\t" | ||
| 386 | "Lnospill_%=: \n\t" | ||
| 387 | /* Extract the LOW 8-bits (r0) */ | ||
| 388 | " mov %0, r0 \n\t" | ||
| 389 | "Ldone_%=: \n\t" | ||
| 390 | /* Restore r1 to "0"; it's expected to always be that */ | ||
| 391 | " clr __zero_reg__ \n\t" | ||
| 392 | : "+a" (i) | ||
| 393 | : "a" (j) | ||
| 394 | : "r0", "r1"); | ||
| 395 | |||
| 396 | return i; | ||
| 397 | #else | ||
| 398 | #error "No implementation for qmul8 available." | ||
| 399 | #endif | ||
| 400 | } | ||
| 401 | |||
| 402 | |||
| 403 | /// take abs() of a signed 8-bit uint8_t | ||
| 404 | LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i) | ||
| 405 | { | ||
| 406 | #if ABS8_C == 1 | ||
| 407 | if( i < 0) i = -i; | ||
| 408 | return i; | ||
| 409 | #elif ABS8_AVRASM == 1 | ||
| 410 | |||
| 411 | |||
| 412 | asm volatile( | ||
| 413 | /* First, check the high bit, and prepare to skip if it's clear */ | ||
| 414 | "sbrc %0, 7 \n" | ||
| 415 | |||
| 416 | /* Negate the value */ | ||
| 417 | "neg %0 \n" | ||
| 418 | |||
| 419 | : "+r" (i) : "r" (i) ); | ||
| 420 | return i; | ||
| 421 | #else | ||
| 422 | #error "No implementation for abs8 available." | ||
| 423 | #endif | ||
| 424 | } | ||
| 425 | |||
| 426 | /// square root for 16-bit integers | ||
| 427 | /// About three times faster and five times smaller | ||
| 428 | /// than Arduino's general sqrt on AVR. | ||
| 429 | LIB8STATIC uint8_t sqrt16(uint16_t x) | ||
| 430 | { | ||
| 431 | if( x <= 1) { | ||
| 432 | return x; | ||
| 433 | } | ||
| 434 | |||
| 435 | uint8_t low = 1; // lower bound | ||
| 436 | uint8_t hi, mid; | ||
| 437 | |||
| 438 | if( x > 7904) { | ||
| 439 | hi = 255; | ||
| 440 | } else { | ||
| 441 | hi = (x >> 5) + 8; // initial estimate for upper bound | ||
| 442 | } | ||
| 443 | |||
| 444 | do { | ||
| 445 | mid = (low + hi) >> 1; | ||
| 446 | if ((uint16_t)(mid * mid) > x) { | ||
| 447 | hi = mid - 1; | ||
| 448 | } else { | ||
| 449 | if( mid == 255) { | ||
| 450 | return 255; | ||
| 451 | } | ||
| 452 | low = mid + 1; | ||
| 453 | } | ||
| 454 | } while (hi >= low); | ||
| 455 | |||
| 456 | return low - 1; | ||
| 457 | } | ||
| 458 | |||
| 459 | /// blend a variable proproportion(0-255) of one byte to another | ||
| 460 | /// @param a - the starting byte value | ||
| 461 | /// @param b - the byte value to blend toward | ||
| 462 | /// @param amountOfB - the proportion (0-255) of b to blend | ||
| 463 | /// @returns a byte value between a and b, inclusive | ||
| 464 | #if (FASTLED_BLEND_FIXED == 1) | ||
| 465 | LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB) | ||
| 466 | { | ||
| 467 | #if BLEND8_C == 1 | ||
| 468 | uint16_t partial; | ||
| 469 | uint8_t result; | ||
| 470 | |||
| 471 | uint8_t amountOfA = 255 - amountOfB; | ||
| 472 | |||
| 473 | partial = (a * amountOfA); | ||
| 474 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 475 | partial += a; | ||
| 476 | //partial = add8to16( a, partial); | ||
| 477 | #endif | ||
| 478 | |||
| 479 | partial += (b * amountOfB); | ||
| 480 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 481 | partial += b; | ||
| 482 | //partial = add8to16( b, partial); | ||
| 483 | #endif | ||
| 484 | |||
| 485 | result = partial >> 8; | ||
| 486 | |||
| 487 | return result; | ||
| 488 | |||
| 489 | #elif BLEND8_AVRASM == 1 | ||
| 490 | uint16_t partial; | ||
| 491 | uint8_t result; | ||
| 492 | |||
| 493 | asm volatile ( | ||
| 494 | /* partial = b * amountOfB */ | ||
| 495 | " mul %[b], %[amountOfB] \n\t" | ||
| 496 | " movw %A[partial], r0 \n\t" | ||
| 497 | |||
| 498 | /* amountOfB (aka amountOfA) = 255 - amountOfB */ | ||
| 499 | " com %[amountOfB] \n\t" | ||
| 500 | |||
| 501 | /* partial += a * amountOfB (aka amountOfA) */ | ||
| 502 | " mul %[a], %[amountOfB] \n\t" | ||
| 503 | |||
| 504 | " add %A[partial], r0 \n\t" | ||
| 505 | " adc %B[partial], r1 \n\t" | ||
| 506 | |||
| 507 | " clr __zero_reg__ \n\t" | ||
| 508 | |||
| 509 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 510 | /* partial += a */ | ||
| 511 | " add %A[partial], %[a] \n\t" | ||
| 512 | " adc %B[partial], __zero_reg__ \n\t" | ||
| 513 | |||
| 514 | // partial += b | ||
| 515 | " add %A[partial], %[b] \n\t" | ||
| 516 | " adc %B[partial], __zero_reg__ \n\t" | ||
| 517 | #endif | ||
| 518 | |||
| 519 | : [partial] "=r" (partial), | ||
| 520 | [amountOfB] "+a" (amountOfB) | ||
| 521 | : [a] "a" (a), | ||
| 522 | [b] "a" (b) | ||
| 523 | : "r0", "r1" | ||
| 524 | ); | ||
| 525 | |||
| 526 | result = partial >> 8; | ||
| 527 | |||
| 528 | return result; | ||
| 529 | |||
| 530 | #else | ||
| 531 | #error "No implementation for blend8 available." | ||
| 532 | #endif | ||
| 533 | } | ||
| 534 | |||
| 535 | #else | ||
| 536 | LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB) | ||
| 537 | { | ||
| 538 | // This version loses precision in the integer math | ||
| 539 | // and can actually return results outside of the range | ||
| 540 | // from a to b. Its use is not recommended. | ||
| 541 | uint8_t result; | ||
| 542 | uint8_t amountOfA = 255 - amountOfB; | ||
| 543 | result = scale8_LEAVING_R1_DIRTY( a, amountOfA) | ||
| 544 | + scale8_LEAVING_R1_DIRTY( b, amountOfB); | ||
| 545 | cleanup_R1(); | ||
| 546 | return result; | ||
| 547 | } | ||
| 548 | #endif | ||
| 549 | |||
| 550 | |||
| 551 | ///@} | ||
| 552 | #endif | ||
diff --git a/lib/lib8tion/random8.h b/lib/lib8tion/random8.h new file mode 100644 index 000000000..7ee67cbb3 --- /dev/null +++ b/lib/lib8tion/random8.h | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | #ifndef __INC_LIB8TION_RANDOM_H | ||
| 2 | #define __INC_LIB8TION_RANDOM_H | ||
| 3 | ///@ingroup lib8tion | ||
| 4 | |||
| 5 | ///@defgroup Random Fast random number generators | ||
| 6 | /// Fast 8- and 16- bit unsigned random numbers. | ||
| 7 | /// Significantly faster than Arduino random(), but | ||
| 8 | /// also somewhat less random. You can add entropy. | ||
| 9 | ///@{ | ||
| 10 | |||
| 11 | // X(n+1) = (2053 * X(n)) + 13849) | ||
| 12 | #define FASTLED_RAND16_2053 ((uint16_t)(2053)) | ||
| 13 | #define FASTLED_RAND16_13849 ((uint16_t)(13849)) | ||
| 14 | |||
| 15 | /// random number seed | ||
| 16 | extern uint16_t rand16seed;// = RAND16_SEED; | ||
| 17 | |||
| 18 | /// Generate an 8-bit random number | ||
| 19 | LIB8STATIC uint8_t random8(void) | ||
| 20 | { | ||
| 21 | rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849; | ||
| 22 | // return the sum of the high and low bytes, for better | ||
| 23 | // mixing and non-sequential correlation | ||
| 24 | return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) + | ||
| 25 | ((uint8_t)(rand16seed >> 8))); | ||
| 26 | } | ||
| 27 | |||
| 28 | /// Generate a 16 bit random number | ||
| 29 | LIB8STATIC uint16_t random16(void) | ||
| 30 | { | ||
| 31 | rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849; | ||
| 32 | return rand16seed; | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Generate an 8-bit random number between 0 and lim | ||
| 36 | /// @param lim the upper bound for the result | ||
| 37 | LIB8STATIC uint8_t random8_max(uint8_t lim) | ||
| 38 | { | ||
| 39 | uint8_t r = random8(); | ||
| 40 | r = (r*lim) >> 8; | ||
| 41 | return r; | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Generate an 8-bit random number in the given range | ||
| 45 | /// @param min the lower bound for the random number | ||
| 46 | /// @param lim the upper bound for the random number | ||
| 47 | LIB8STATIC uint8_t random8_min_max(uint8_t min, uint8_t lim) | ||
| 48 | { | ||
| 49 | uint8_t delta = lim - min; | ||
| 50 | uint8_t r = random8_max(delta) + min; | ||
| 51 | return r; | ||
| 52 | } | ||
| 53 | |||
| 54 | /// Generate an 16-bit random number between 0 and lim | ||
| 55 | /// @param lim the upper bound for the result | ||
| 56 | LIB8STATIC uint16_t random16_max(uint16_t lim) | ||
| 57 | { | ||
| 58 | uint16_t r = random16(); | ||
| 59 | uint32_t p = (uint32_t)lim * (uint32_t)r; | ||
| 60 | r = p >> 16; | ||
| 61 | return r; | ||
| 62 | } | ||
| 63 | |||
| 64 | /// Generate an 16-bit random number in the given range | ||
| 65 | /// @param min the lower bound for the random number | ||
| 66 | /// @param lim the upper bound for the random number | ||
| 67 | LIB8STATIC uint16_t random16_min_max( uint16_t min, uint16_t lim) | ||
| 68 | { | ||
| 69 | uint16_t delta = lim - min; | ||
| 70 | uint16_t r = random16_max(delta) + min; | ||
| 71 | return r; | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Set the 16-bit seed used for the random number generator | ||
| 75 | LIB8STATIC void random16_set_seed(uint16_t seed) | ||
| 76 | { | ||
| 77 | rand16seed = seed; | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Get the current seed value for the random number generator | ||
| 81 | LIB8STATIC uint16_t random16_get_seed(void) | ||
| 82 | { | ||
| 83 | return rand16seed; | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Add entropy into the random number generator | ||
| 87 | LIB8STATIC void random16_add_entropy(uint16_t entropy) | ||
| 88 | { | ||
| 89 | rand16seed += entropy; | ||
| 90 | } | ||
| 91 | |||
| 92 | ///@} | ||
| 93 | |||
| 94 | #endif | ||
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h new file mode 100644 index 000000000..9895fd4d7 --- /dev/null +++ b/lib/lib8tion/scale8.h | |||
| @@ -0,0 +1,542 @@ | |||
| 1 | #ifndef __INC_LIB8TION_SCALE_H | ||
| 2 | #define __INC_LIB8TION_SCALE_H | ||
| 3 | |||
| 4 | ///@ingroup lib8tion | ||
| 5 | |||
| 6 | ///@defgroup Scaling Scaling functions | ||
| 7 | /// Fast, efficient 8-bit scaling functions specifically | ||
| 8 | /// designed for high-performance LED programming. | ||
| 9 | /// | ||
| 10 | /// Because of the AVR(Arduino) and ARM assembly language | ||
| 11 | /// implementations provided, using these functions often | ||
| 12 | /// results in smaller and faster code than the equivalent | ||
| 13 | /// program using plain "C" arithmetic and logic. | ||
| 14 | ///@{ | ||
| 15 | |||
| 16 | /// scale one byte by a second one, which is treated as | ||
| 17 | /// the numerator of a fraction whose denominator is 256 | ||
| 18 | /// In other words, it computes i * (scale / 256) | ||
| 19 | /// 4 clocks AVR with MUL, 2 clocks ARM | ||
| 20 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale) | ||
| 21 | { | ||
| 22 | #if SCALE8_C == 1 | ||
| 23 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 24 | return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8; | ||
| 25 | #else | ||
| 26 | return ((uint16_t)i * (uint16_t)(scale) ) >> 8; | ||
| 27 | #endif | ||
| 28 | #elif SCALE8_AVRASM == 1 | ||
| 29 | #if defined(LIB8_ATTINY) | ||
| 30 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 31 | uint8_t work=i; | ||
| 32 | #else | ||
| 33 | uint8_t work=0; | ||
| 34 | #endif | ||
| 35 | uint8_t cnt=0x80; | ||
| 36 | asm volatile( | ||
| 37 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 38 | " inc %[scale] \n\t" | ||
| 39 | " breq DONE_%= \n\t" | ||
| 40 | " clr %[work] \n\t" | ||
| 41 | #endif | ||
| 42 | "LOOP_%=: \n\t" | ||
| 43 | /*" sbrc %[scale], 0 \n\t" | ||
| 44 | " add %[work], %[i] \n\t" | ||
| 45 | " ror %[work] \n\t" | ||
| 46 | " lsr %[scale] \n\t" | ||
| 47 | " clc \n\t"*/ | ||
| 48 | " sbrc %[scale], 0 \n\t" | ||
| 49 | " add %[work], %[i] \n\t" | ||
| 50 | " ror %[work] \n\t" | ||
| 51 | " lsr %[scale] \n\t" | ||
| 52 | " lsr %[cnt] \n\t" | ||
| 53 | "brcc LOOP_%= \n\t" | ||
| 54 | "DONE_%=: \n\t" | ||
| 55 | : [work] "+r" (work), [cnt] "+r" (cnt) | ||
| 56 | : [scale] "r" (scale), [i] "r" (i) | ||
| 57 | : | ||
| 58 | ); | ||
| 59 | return work; | ||
| 60 | #else | ||
| 61 | asm volatile( | ||
| 62 | #if (FASTLED_SCALE8_FIXED==1) | ||
| 63 | // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 | ||
| 64 | "mul %0, %1 \n\t" | ||
| 65 | // Add i to r0, possibly setting the carry flag | ||
| 66 | "add r0, %0 \n\t" | ||
| 67 | // load the immediate 0 into i (note, this does _not_ touch any flags) | ||
| 68 | "ldi %0, 0x00 \n\t" | ||
| 69 | // walk and chew gum at the same time | ||
| 70 | "adc %0, r1 \n\t" | ||
| 71 | #else | ||
| 72 | /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ | ||
| 73 | "mul %0, %1 \n\t" | ||
| 74 | /* Move the high 8-bits of the product (r1) back to i */ | ||
| 75 | "mov %0, r1 \n\t" | ||
| 76 | /* Restore r1 to "0"; it's expected to always be that */ | ||
| 77 | #endif | ||
| 78 | "clr __zero_reg__ \n\t" | ||
| 79 | |||
| 80 | : "+a" (i) /* writes to i */ | ||
| 81 | : "a" (scale) /* uses scale */ | ||
| 82 | : "r0", "r1" /* clobbers r0, r1 */ ); | ||
| 83 | |||
| 84 | /* Return the result */ | ||
| 85 | return i; | ||
| 86 | #endif | ||
| 87 | #else | ||
| 88 | #error "No implementation for scale8 available." | ||
| 89 | #endif | ||
| 90 | } | ||
| 91 | |||
| 92 | |||
| 93 | /// The "video" version of scale8 guarantees that the output will | ||
| 94 | /// be only be zero if one or both of the inputs are zero. If both | ||
| 95 | /// inputs are non-zero, the output is guaranteed to be non-zero. | ||
| 96 | /// This makes for better 'video'/LED dimming, at the cost of | ||
| 97 | /// several additional cycles. | ||
| 98 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale) | ||
| 99 | { | ||
| 100 | #if SCALE8_C == 1 || defined(LIB8_ATTINY) | ||
| 101 | uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); | ||
| 102 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
| 103 | // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; | ||
| 104 | return j; | ||
| 105 | #elif SCALE8_AVRASM == 1 | ||
| 106 | uint8_t j=0; | ||
| 107 | asm volatile( | ||
| 108 | " tst %[i]\n\t" | ||
| 109 | " breq L_%=\n\t" | ||
| 110 | " mul %[i], %[scale]\n\t" | ||
| 111 | " mov %[j], r1\n\t" | ||
| 112 | " clr __zero_reg__\n\t" | ||
| 113 | " cpse %[scale], r1\n\t" | ||
| 114 | " subi %[j], 0xFF\n\t" | ||
| 115 | "L_%=: \n\t" | ||
| 116 | : [j] "+a" (j) | ||
| 117 | : [i] "a" (i), [scale] "a" (scale) | ||
| 118 | : "r0", "r1"); | ||
| 119 | |||
| 120 | return j; | ||
| 121 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
| 122 | // asm volatile( | ||
| 123 | // " tst %0 \n" | ||
| 124 | // " breq L_%= \n" | ||
| 125 | // " mul %0, %1 \n" | ||
| 126 | // " mov %0, r1 \n" | ||
| 127 | // " add %0, %2 \n" | ||
| 128 | // " clr __zero_reg__ \n" | ||
| 129 | // "L_%=: \n" | ||
| 130 | |||
| 131 | // : "+a" (i) | ||
| 132 | // : "a" (scale), "a" (nonzeroscale) | ||
| 133 | // : "r0", "r1"); | ||
| 134 | |||
| 135 | // // Return the result | ||
| 136 | // return i; | ||
| 137 | #else | ||
| 138 | #error "No implementation for scale8_video available." | ||
| 139 | #endif | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | /// This version of scale8 does not clean up the R1 register on AVR | ||
| 144 | /// If you are doing several 'scale8's in a row, use this, and | ||
| 145 | /// then explicitly call cleanup_R1. | ||
| 146 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) | ||
| 147 | { | ||
| 148 | #if SCALE8_C == 1 | ||
| 149 | #if (FASTLED_SCALE8_FIXED == 1) | ||
| 150 | return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8; | ||
| 151 | #else | ||
| 152 | return ((int)i * (int)(scale) ) >> 8; | ||
| 153 | #endif | ||
| 154 | #elif SCALE8_AVRASM == 1 | ||
| 155 | asm volatile( | ||
| 156 | #if (FASTLED_SCALE8_FIXED==1) | ||
| 157 | // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 | ||
| 158 | "mul %0, %1 \n\t" | ||
| 159 | // Add i to r0, possibly setting the carry flag | ||
| 160 | "add r0, %0 \n\t" | ||
| 161 | // load the immediate 0 into i (note, this does _not_ touch any flags) | ||
| 162 | "ldi %0, 0x00 \n\t" | ||
| 163 | // walk and chew gum at the same time | ||
| 164 | "adc %0, r1 \n\t" | ||
| 165 | #else | ||
| 166 | /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ | ||
| 167 | "mul %0, %1 \n\t" | ||
| 168 | /* Move the high 8-bits of the product (r1) back to i */ | ||
| 169 | "mov %0, r1 \n\t" | ||
| 170 | #endif | ||
| 171 | /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */ | ||
| 172 | /* "clr __zero_reg__ \n\t" */ | ||
| 173 | |||
| 174 | : "+a" (i) /* writes to i */ | ||
| 175 | : "a" (scale) /* uses scale */ | ||
| 176 | : "r0", "r1" /* clobbers r0, r1 */ ); | ||
| 177 | |||
| 178 | // Return the result | ||
| 179 | return i; | ||
| 180 | #else | ||
| 181 | #error "No implementation for scale8_LEAVING_R1_DIRTY available." | ||
| 182 | #endif | ||
| 183 | } | ||
| 184 | |||
| 185 | |||
| 186 | /// This version of scale8_video does not clean up the R1 register on AVR | ||
| 187 | /// If you are doing several 'scale8_video's in a row, use this, and | ||
| 188 | /// then explicitly call cleanup_R1. | ||
| 189 | LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) | ||
| 190 | { | ||
| 191 | #if SCALE8_C == 1 || defined(LIB8_ATTINY) | ||
| 192 | uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); | ||
| 193 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
| 194 | // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; | ||
| 195 | return j; | ||
| 196 | #elif SCALE8_AVRASM == 1 | ||
| 197 | uint8_t j=0; | ||
| 198 | asm volatile( | ||
| 199 | " tst %[i]\n\t" | ||
| 200 | " breq L_%=\n\t" | ||
| 201 | " mul %[i], %[scale]\n\t" | ||
| 202 | " mov %[j], r1\n\t" | ||
| 203 | " breq L_%=\n\t" | ||
| 204 | " subi %[j], 0xFF\n\t" | ||
| 205 | "L_%=: \n\t" | ||
| 206 | : [j] "+a" (j) | ||
| 207 | : [i] "a" (i), [scale] "a" (scale) | ||
| 208 | : "r0", "r1"); | ||
| 209 | |||
| 210 | return j; | ||
| 211 | // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; | ||
| 212 | // asm volatile( | ||
| 213 | // " tst %0 \n" | ||
| 214 | // " breq L_%= \n" | ||
| 215 | // " mul %0, %1 \n" | ||
| 216 | // " mov %0, r1 \n" | ||
| 217 | // " add %0, %2 \n" | ||
| 218 | // " clr __zero_reg__ \n" | ||
| 219 | // "L_%=: \n" | ||
| 220 | |||
| 221 | // : "+a" (i) | ||
| 222 | // : "a" (scale), "a" (nonzeroscale) | ||
| 223 | // : "r0", "r1"); | ||
| 224 | |||
| 225 | // // Return the result | ||
| 226 | // return i; | ||
| 227 | #else | ||
| 228 | #error "No implementation for scale8_video_LEAVING_R1_DIRTY available." | ||
| 229 | #endif | ||
| 230 | } | ||
| 231 | |||
| 232 | /// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls | ||
| 233 | LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void) | ||
| 234 | { | ||
| 235 | #if CLEANUP_R1_AVRASM == 1 | ||
| 236 | // Restore r1 to "0"; it's expected to always be that | ||
| 237 | asm volatile( "clr __zero_reg__ \n\t" : : : "r1" ); | ||
| 238 | #endif | ||
| 239 | } | ||
| 240 | |||
| 241 | |||
| 242 | /// scale a 16-bit unsigned value by an 8-bit value, | ||
| 243 | /// considered as numerator of a fraction whose denominator | ||
| 244 | /// is 256. In other words, it computes i * (scale / 256) | ||
| 245 | |||
| 246 | LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) | ||
| 247 | { | ||
| 248 | #if SCALE16BY8_C == 1 | ||
| 249 | uint16_t result; | ||
| 250 | #if FASTLED_SCALE8_FIXED == 1 | ||
| 251 | result = (i * (1+((uint16_t)scale))) >> 8; | ||
| 252 | #else | ||
| 253 | result = (i * scale) / 256; | ||
| 254 | #endif | ||
| 255 | return result; | ||
| 256 | #elif SCALE16BY8_AVRASM == 1 | ||
| 257 | #if FASTLED_SCALE8_FIXED == 1 | ||
| 258 | uint16_t result = 0; | ||
| 259 | asm volatile( | ||
| 260 | // result.A = HighByte( (i.A x scale) + i.A ) | ||
| 261 | " mul %A[i], %[scale] \n\t" | ||
| 262 | " add r0, %A[i] \n\t" | ||
| 263 | // " adc r1, [zero] \n\t" | ||
| 264 | // " mov %A[result], r1 \n\t" | ||
| 265 | " adc %A[result], r1 \n\t" | ||
| 266 | |||
| 267 | // result.A-B += i.B x scale | ||
| 268 | " mul %B[i], %[scale] \n\t" | ||
| 269 | " add %A[result], r0 \n\t" | ||
| 270 | " adc %B[result], r1 \n\t" | ||
| 271 | |||
| 272 | // cleanup r1 | ||
| 273 | " clr __zero_reg__ \n\t" | ||
| 274 | |||
| 275 | // result.A-B += i.B | ||
| 276 | " add %A[result], %B[i] \n\t" | ||
| 277 | " adc %B[result], __zero_reg__ \n\t" | ||
| 278 | |||
| 279 | : [result] "+r" (result) | ||
| 280 | : [i] "r" (i), [scale] "r" (scale) | ||
| 281 | : "r0", "r1" | ||
| 282 | ); | ||
| 283 | return result; | ||
| 284 | #else | ||
| 285 | uint16_t result = 0; | ||
| 286 | asm volatile( | ||
| 287 | // result.A = HighByte(i.A x j ) | ||
| 288 | " mul %A[i], %[scale] \n\t" | ||
| 289 | " mov %A[result], r1 \n\t" | ||
| 290 | //" clr %B[result] \n\t" | ||
| 291 | |||
| 292 | // result.A-B += i.B x j | ||
| 293 | " mul %B[i], %[scale] \n\t" | ||
| 294 | " add %A[result], r0 \n\t" | ||
| 295 | " adc %B[result], r1 \n\t" | ||
| 296 | |||
| 297 | // cleanup r1 | ||
| 298 | " clr __zero_reg__ \n\t" | ||
| 299 | |||
| 300 | : [result] "+r" (result) | ||
| 301 | : [i] "r" (i), [scale] "r" (scale) | ||
| 302 | : "r0", "r1" | ||
| 303 | ); | ||
| 304 | return result; | ||
| 305 | #endif | ||
| 306 | #else | ||
| 307 | #error "No implementation for scale16by8 available." | ||
| 308 | #endif | ||
| 309 | } | ||
| 310 | |||
| 311 | /// scale a 16-bit unsigned value by a 16-bit value, | ||
| 312 | /// considered as numerator of a fraction whose denominator | ||
| 313 | /// is 65536. In other words, it computes i * (scale / 65536) | ||
| 314 | |||
| 315 | LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) | ||
| 316 | { | ||
| 317 | #if SCALE16_C == 1 | ||
| 318 | uint16_t result; | ||
| 319 | #if FASTLED_SCALE8_FIXED == 1 | ||
| 320 | result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536; | ||
| 321 | #else | ||
| 322 | result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; | ||
| 323 | #endif | ||
| 324 | return result; | ||
| 325 | #elif SCALE16_AVRASM == 1 | ||
| 326 | #if FASTLED_SCALE8_FIXED == 1 | ||
| 327 | // implemented sort of like | ||
| 328 | // result = ((i * scale) + i ) / 65536 | ||
| 329 | // | ||
| 330 | // why not like this, you may ask? | ||
| 331 | // result = (i * (scale+1)) / 65536 | ||
| 332 | // the answer is that if scale is 65535, then scale+1 | ||
| 333 | // will be zero, which is not what we want. | ||
| 334 | uint32_t result; | ||
| 335 | asm volatile( | ||
| 336 | // result.A-B = i.A x scale.A | ||
| 337 | " mul %A[i], %A[scale] \n\t" | ||
| 338 | // save results... | ||
| 339 | // basic idea: | ||
| 340 | //" mov %A[result], r0 \n\t" | ||
| 341 | //" mov %B[result], r1 \n\t" | ||
| 342 | // which can be written as... | ||
| 343 | " movw %A[result], r0 \n\t" | ||
| 344 | // Because we're going to add i.A-B to | ||
| 345 | // result.A-D, we DO need to keep both | ||
| 346 | // the r0 and r1 portions of the product | ||
| 347 | // UNlike in the 'unfixed scale8' version. | ||
| 348 | // So the movw here is needed. | ||
| 349 | : [result] "=r" (result) | ||
| 350 | : [i] "r" (i), | ||
| 351 | [scale] "r" (scale) | ||
| 352 | : "r0", "r1" | ||
| 353 | ); | ||
| 354 | |||
| 355 | asm volatile( | ||
| 356 | // result.C-D = i.B x scale.B | ||
| 357 | " mul %B[i], %B[scale] \n\t" | ||
| 358 | //" mov %C[result], r0 \n\t" | ||
| 359 | //" mov %D[result], r1 \n\t" | ||
| 360 | " movw %C[result], r0 \n\t" | ||
| 361 | : [result] "+r" (result) | ||
| 362 | : [i] "r" (i), | ||
| 363 | [scale] "r" (scale) | ||
| 364 | : "r0", "r1" | ||
| 365 | ); | ||
| 366 | |||
| 367 | const uint8_t zero = 0; | ||
| 368 | asm volatile( | ||
| 369 | // result.B-D += i.B x scale.A | ||
| 370 | " mul %B[i], %A[scale] \n\t" | ||
| 371 | |||
| 372 | " add %B[result], r0 \n\t" | ||
| 373 | " adc %C[result], r1 \n\t" | ||
| 374 | " adc %D[result], %[zero] \n\t" | ||
| 375 | |||
| 376 | // result.B-D += i.A x scale.B | ||
| 377 | " mul %A[i], %B[scale] \n\t" | ||
| 378 | |||
| 379 | " add %B[result], r0 \n\t" | ||
| 380 | " adc %C[result], r1 \n\t" | ||
| 381 | " adc %D[result], %[zero] \n\t" | ||
| 382 | |||
| 383 | // cleanup r1 | ||
| 384 | " clr r1 \n\t" | ||
| 385 | |||
| 386 | : [result] "+r" (result) | ||
| 387 | : [i] "r" (i), | ||
| 388 | [scale] "r" (scale), | ||
| 389 | [zero] "r" (zero) | ||
| 390 | : "r0", "r1" | ||
| 391 | ); | ||
| 392 | |||
| 393 | asm volatile( | ||
| 394 | // result.A-D += i.A-B | ||
| 395 | " add %A[result], %A[i] \n\t" | ||
| 396 | " adc %B[result], %B[i] \n\t" | ||
| 397 | " adc %C[result], %[zero] \n\t" | ||
| 398 | " adc %D[result], %[zero] \n\t" | ||
| 399 | : [result] "+r" (result) | ||
| 400 | : [i] "r" (i), | ||
| 401 | [zero] "r" (zero) | ||
| 402 | ); | ||
| 403 | |||
| 404 | result = result >> 16; | ||
| 405 | return result; | ||
| 406 | #else | ||
| 407 | uint32_t result; | ||
| 408 | asm volatile( | ||
| 409 | // result.A-B = i.A x scale.A | ||
| 410 | " mul %A[i], %A[scale] \n\t" | ||
| 411 | // save results... | ||
| 412 | // basic idea: | ||
| 413 | //" mov %A[result], r0 \n\t" | ||
| 414 | //" mov %B[result], r1 \n\t" | ||
| 415 | // which can be written as... | ||
| 416 | " movw %A[result], r0 \n\t" | ||
| 417 | // We actually don't need to do anything with r0, | ||
| 418 | // as result.A is never used again here, so we | ||
| 419 | // could just move the high byte, but movw is | ||
| 420 | // one clock cycle, just like mov, so might as | ||
| 421 | // well, in case we want to use this code for | ||
| 422 | // a generic 16x16 multiply somewhere. | ||
| 423 | |||
| 424 | : [result] "=r" (result) | ||
| 425 | : [i] "r" (i), | ||
| 426 | [scale] "r" (scale) | ||
| 427 | : "r0", "r1" | ||
| 428 | ); | ||
| 429 | |||
| 430 | asm volatile( | ||
| 431 | // result.C-D = i.B x scale.B | ||
| 432 | " mul %B[i], %B[scale] \n\t" | ||
| 433 | //" mov %C[result], r0 \n\t" | ||
| 434 | //" mov %D[result], r1 \n\t" | ||
| 435 | " movw %C[result], r0 \n\t" | ||
| 436 | : [result] "+r" (result) | ||
| 437 | : [i] "r" (i), | ||
| 438 | [scale] "r" (scale) | ||
| 439 | : "r0", "r1" | ||
| 440 | ); | ||
| 441 | |||
| 442 | const uint8_t zero = 0; | ||
| 443 | asm volatile( | ||
| 444 | // result.B-D += i.B x scale.A | ||
| 445 | " mul %B[i], %A[scale] \n\t" | ||
| 446 | |||
| 447 | " add %B[result], r0 \n\t" | ||
| 448 | " adc %C[result], r1 \n\t" | ||
| 449 | " adc %D[result], %[zero] \n\t" | ||
| 450 | |||
| 451 | // result.B-D += i.A x scale.B | ||
| 452 | " mul %A[i], %B[scale] \n\t" | ||
| 453 | |||
| 454 | " add %B[result], r0 \n\t" | ||
| 455 | " adc %C[result], r1 \n\t" | ||
| 456 | " adc %D[result], %[zero] \n\t" | ||
| 457 | |||
| 458 | // cleanup r1 | ||
| 459 | " clr r1 \n\t" | ||
| 460 | |||
| 461 | : [result] "+r" (result) | ||
| 462 | : [i] "r" (i), | ||
| 463 | [scale] "r" (scale), | ||
| 464 | [zero] "r" (zero) | ||
| 465 | : "r0", "r1" | ||
| 466 | ); | ||
| 467 | |||
| 468 | result = result >> 16; | ||
| 469 | return result; | ||
| 470 | #endif | ||
| 471 | #else | ||
| 472 | #error "No implementation for scale16 available." | ||
| 473 | #endif | ||
| 474 | } | ||
| 475 | ///@} | ||
| 476 | |||
| 477 | ///@defgroup Dimming Dimming and brightening functions | ||
| 478 | /// | ||
| 479 | /// Dimming and brightening functions | ||
| 480 | /// | ||
| 481 | /// The eye does not respond in a linear way to light. | ||
| 482 | /// High speed PWM'd LEDs at 50% duty cycle appear far | ||
| 483 | /// brighter then the 'half as bright' you might expect. | ||
| 484 | /// | ||
| 485 | /// If you want your midpoint brightness leve (128) to | ||
| 486 | /// appear half as bright as 'full' brightness (255), you | ||
| 487 | /// have to apply a 'dimming function'. | ||
| 488 | ///@{ | ||
| 489 | |||
| 490 | /// Adjust a scaling value for dimming | ||
| 491 | LIB8STATIC uint8_t dim8_raw( uint8_t x) | ||
| 492 | { | ||
| 493 | return scale8( x, x); | ||
| 494 | } | ||
| 495 | |||
| 496 | /// Adjust a scaling value for dimming for video (value will never go below 1) | ||
| 497 | LIB8STATIC uint8_t dim8_video( uint8_t x) | ||
| 498 | { | ||
| 499 | return scale8_video( x, x); | ||
| 500 | } | ||
| 501 | |||
| 502 | /// Linear version of the dimming function that halves for values < 128 | ||
| 503 | LIB8STATIC uint8_t dim8_lin( uint8_t x ) | ||
| 504 | { | ||
| 505 | if( x & 0x80 ) { | ||
| 506 | x = scale8( x, x); | ||
| 507 | } else { | ||
| 508 | x += 1; | ||
| 509 | x /= 2; | ||
| 510 | } | ||
| 511 | return x; | ||
| 512 | } | ||
| 513 | |||
| 514 | /// inverse of the dimming function, brighten a value | ||
| 515 | LIB8STATIC uint8_t brighten8_raw( uint8_t x) | ||
| 516 | { | ||
| 517 | uint8_t ix = 255 - x; | ||
| 518 | return 255 - scale8( ix, ix); | ||
| 519 | } | ||
| 520 | |||
| 521 | /// inverse of the dimming function, brighten a value | ||
| 522 | LIB8STATIC uint8_t brighten8_video( uint8_t x) | ||
| 523 | { | ||
| 524 | uint8_t ix = 255 - x; | ||
| 525 | return 255 - scale8_video( ix, ix); | ||
| 526 | } | ||
| 527 | |||
| 528 | /// inverse of the dimming function, brighten a value | ||
| 529 | LIB8STATIC uint8_t brighten8_lin( uint8_t x ) | ||
| 530 | { | ||
| 531 | uint8_t ix = 255 - x; | ||
| 532 | if( ix & 0x80 ) { | ||
| 533 | ix = scale8( ix, ix); | ||
| 534 | } else { | ||
| 535 | ix += 1; | ||
| 536 | ix /= 2; | ||
| 537 | } | ||
| 538 | return 255 - ix; | ||
| 539 | } | ||
| 540 | |||
| 541 | ///@} | ||
| 542 | #endif | ||
diff --git a/lib/lib8tion/trig8.h b/lib/lib8tion/trig8.h new file mode 100644 index 000000000..4907c6ff3 --- /dev/null +++ b/lib/lib8tion/trig8.h | |||
| @@ -0,0 +1,259 @@ | |||
| 1 | #ifndef __INC_LIB8TION_TRIG_H | ||
| 2 | #define __INC_LIB8TION_TRIG_H | ||
| 3 | |||
| 4 | ///@ingroup lib8tion | ||
| 5 | |||
| 6 | ///@defgroup Trig Fast trig functions | ||
| 7 | /// Fast 8 and 16-bit approximations of sin(x) and cos(x). | ||
| 8 | /// Don't use these approximations for calculating the | ||
| 9 | /// trajectory of a rocket to Mars, but they're great | ||
| 10 | /// for art projects and LED displays. | ||
| 11 | /// | ||
| 12 | /// On Arduino/AVR, the 16-bit approximation is more than | ||
| 13 | /// 10X faster than floating point sin(x) and cos(x), while | ||
| 14 | /// the 8-bit approximation is more than 20X faster. | ||
| 15 | ///@{ | ||
| 16 | |||
| 17 | #if defined(__AVR__) | ||
| 18 | #define sin16 sin16_avr | ||
| 19 | #else | ||
| 20 | #define sin16 sin16_C | ||
| 21 | #endif | ||
| 22 | |||
| 23 | /// Fast 16-bit approximation of sin(x). This approximation never varies more than | ||
| 24 | /// 0.69% from the floating point value you'd get by doing | ||
| 25 | /// | ||
| 26 | /// float s = sin(x) * 32767.0; | ||
| 27 | /// | ||
| 28 | /// @param theta input angle from 0-65535 | ||
| 29 | /// @returns sin of theta, value between -32767 to 32767. | ||
| 30 | LIB8STATIC int16_t sin16_avr( uint16_t theta ) | ||
| 31 | { | ||
| 32 | static const uint8_t data[] = | ||
| 33 | { 0, 0, 49, 0, 6393%256, 6393/256, 48, 0, | ||
| 34 | 12539%256, 12539/256, 44, 0, 18204%256, 18204/256, 38, 0, | ||
| 35 | 23170%256, 23170/256, 31, 0, 27245%256, 27245/256, 23, 0, | ||
| 36 | 30273%256, 30273/256, 14, 0, 32137%256, 32137/256, 4 /*,0*/ }; | ||
| 37 | |||
| 38 | uint16_t offset = (theta & 0x3FFF); | ||
| 39 | |||
| 40 | // AVR doesn't have a multi-bit shift instruction, | ||
| 41 | // so if we say "offset >>= 3", gcc makes a tiny loop. | ||
| 42 | // Inserting empty volatile statements between each | ||
| 43 | // bit shift forces gcc to unroll the loop. | ||
| 44 | offset >>= 1; // 0..8191 | ||
| 45 | asm volatile(""); | ||
| 46 | offset >>= 1; // 0..4095 | ||
| 47 | asm volatile(""); | ||
| 48 | offset >>= 1; // 0..2047 | ||
| 49 | |||
| 50 | if( theta & 0x4000 ) offset = 2047 - offset; | ||
| 51 | |||
| 52 | uint8_t sectionX4; | ||
| 53 | sectionX4 = offset / 256; | ||
| 54 | sectionX4 *= 4; | ||
| 55 | |||
| 56 | uint8_t m; | ||
| 57 | |||
| 58 | union { | ||
| 59 | uint16_t b; | ||
| 60 | struct { | ||
| 61 | uint8_t blo; | ||
| 62 | uint8_t bhi; | ||
| 63 | }; | ||
| 64 | } u; | ||
| 65 | |||
| 66 | //in effect u.b = blo + (256 * bhi); | ||
| 67 | u.blo = data[ sectionX4 ]; | ||
| 68 | u.bhi = data[ sectionX4 + 1]; | ||
| 69 | m = data[ sectionX4 + 2]; | ||
| 70 | |||
| 71 | uint8_t secoffset8 = (uint8_t)(offset) / 2; | ||
| 72 | |||
| 73 | uint16_t mx = m * secoffset8; | ||
| 74 | |||
| 75 | int16_t y = mx + u.b; | ||
| 76 | if( theta & 0x8000 ) y = -y; | ||
| 77 | |||
| 78 | return y; | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Fast 16-bit approximation of sin(x). This approximation never varies more than | ||
| 82 | /// 0.69% from the floating point value you'd get by doing | ||
| 83 | /// | ||
| 84 | /// float s = sin(x) * 32767.0; | ||
| 85 | /// | ||
| 86 | /// @param theta input angle from 0-65535 | ||
| 87 | /// @returns sin of theta, value between -32767 to 32767. | ||
| 88 | LIB8STATIC int16_t sin16_C( uint16_t theta ) | ||
| 89 | { | ||
| 90 | static const uint16_t base[] = | ||
| 91 | { 0, 6393, 12539, 18204, 23170, 27245, 30273, 32137 }; | ||
| 92 | static const uint8_t slope[] = | ||
| 93 | { 49, 48, 44, 38, 31, 23, 14, 4 }; | ||
| 94 | |||
| 95 | uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047 | ||
| 96 | if( theta & 0x4000 ) offset = 2047 - offset; | ||
| 97 | |||
| 98 | uint8_t section = offset / 256; // 0..7 | ||
| 99 | uint16_t b = base[section]; | ||
| 100 | uint8_t m = slope[section]; | ||
| 101 | |||
| 102 | uint8_t secoffset8 = (uint8_t)(offset) / 2; | ||
| 103 | |||
| 104 | uint16_t mx = m * secoffset8; | ||
| 105 | int16_t y = mx + b; | ||
| 106 | |||
| 107 | if( theta & 0x8000 ) y = -y; | ||
| 108 | |||
| 109 | return y; | ||
| 110 | } | ||
| 111 | |||
| 112 | |||
| 113 | /// Fast 16-bit approximation of cos(x). This approximation never varies more than | ||
| 114 | /// 0.69% from the floating point value you'd get by doing | ||
| 115 | /// | ||
| 116 | /// float s = cos(x) * 32767.0; | ||
| 117 | /// | ||
| 118 | /// @param theta input angle from 0-65535 | ||
| 119 | /// @returns sin of theta, value between -32767 to 32767. | ||
| 120 | LIB8STATIC int16_t cos16( uint16_t theta) | ||
| 121 | { | ||
| 122 | return sin16( theta + 16384); | ||
| 123 | } | ||
| 124 | |||
| 125 | /////////////////////////////////////////////////////////////////////// | ||
| 126 | |||
| 127 | // sin8 & cos8 | ||
| 128 | // Fast 8-bit approximations of sin(x) & cos(x). | ||
| 129 | // Input angle is an unsigned int from 0-255. | ||
| 130 | // Output is an unsigned int from 0 to 255. | ||
| 131 | // | ||
| 132 | // This approximation can vary to to 2% | ||
| 133 | // from the floating point value you'd get by doing | ||
| 134 | // float s = (sin( x ) * 128.0) + 128; | ||
| 135 | // | ||
| 136 | // Don't use this approximation for calculating the | ||
| 137 | // "real" trigonometric calculations, but it's great | ||
| 138 | // for art projects and LED displays. | ||
| 139 | // | ||
| 140 | // On Arduino/AVR, this approximation is more than | ||
| 141 | // 20X faster than floating point sin(x) and cos(x) | ||
| 142 | |||
| 143 | #if defined(__AVR__) && !defined(LIB8_ATTINY) | ||
| 144 | #define sin8 sin8_avr | ||
| 145 | #else | ||
| 146 | #define sin8 sin8_C | ||
| 147 | #endif | ||
| 148 | |||
| 149 | |||
| 150 | const uint8_t b_m16_interleave[] = { 0, 49, 49, 41, 90, 27, 117, 10 }; | ||
| 151 | |||
| 152 | /// Fast 8-bit approximation of sin(x). This approximation never varies more than | ||
| 153 | /// 2% from the floating point value you'd get by doing | ||
| 154 | /// | ||
| 155 | /// float s = (sin(x) * 128.0) + 128; | ||
| 156 | /// | ||
| 157 | /// @param theta input angle from 0-255 | ||
| 158 | /// @returns sin of theta, value between 0 and 255 | ||
| 159 | LIB8STATIC uint8_t sin8_avr( uint8_t theta) | ||
| 160 | { | ||
| 161 | uint8_t offset = theta; | ||
| 162 | |||
| 163 | asm volatile( | ||
| 164 | "sbrc %[theta],6 \n\t" | ||
| 165 | "com %[offset] \n\t" | ||
| 166 | : [theta] "+r" (theta), [offset] "+r" (offset) | ||
| 167 | ); | ||
| 168 | |||
| 169 | offset &= 0x3F; // 0..63 | ||
| 170 | |||
| 171 | uint8_t secoffset = offset & 0x0F; // 0..15 | ||
| 172 | if( theta & 0x40) secoffset++; | ||
| 173 | |||
| 174 | uint8_t m16; uint8_t b; | ||
| 175 | |||
| 176 | uint8_t section = offset >> 4; // 0..3 | ||
| 177 | uint8_t s2 = section * 2; | ||
| 178 | |||
| 179 | const uint8_t* p = b_m16_interleave; | ||
| 180 | p += s2; | ||
| 181 | b = *p; | ||
| 182 | p++; | ||
| 183 | m16 = *p; | ||
| 184 | |||
| 185 | uint8_t mx; | ||
| 186 | uint8_t xr1; | ||
| 187 | asm volatile( | ||
| 188 | "mul %[m16],%[secoffset] \n\t" | ||
| 189 | "mov %[mx],r0 \n\t" | ||
| 190 | "mov %[xr1],r1 \n\t" | ||
| 191 | "eor r1, r1 \n\t" | ||
| 192 | "swap %[mx] \n\t" | ||
| 193 | "andi %[mx],0x0F \n\t" | ||
| 194 | "swap %[xr1] \n\t" | ||
| 195 | "andi %[xr1], 0xF0 \n\t" | ||
| 196 | "or %[mx], %[xr1] \n\t" | ||
| 197 | : [mx] "=d" (mx), [xr1] "=d" (xr1) | ||
| 198 | : [m16] "d" (m16), [secoffset] "d" (secoffset) | ||
| 199 | ); | ||
| 200 | |||
| 201 | int8_t y = mx + b; | ||
| 202 | if( theta & 0x80 ) y = -y; | ||
| 203 | |||
| 204 | y += 128; | ||
| 205 | |||
| 206 | return y; | ||
| 207 | } | ||
| 208 | |||
| 209 | |||
| 210 | /// Fast 8-bit approximation of sin(x). This approximation never varies more than | ||
| 211 | /// 2% from the floating point value you'd get by doing | ||
| 212 | /// | ||
| 213 | /// float s = (sin(x) * 128.0) + 128; | ||
| 214 | /// | ||
| 215 | /// @param theta input angle from 0-255 | ||
| 216 | /// @returns sin of theta, value between 0 and 255 | ||
| 217 | LIB8STATIC uint8_t sin8_C( uint8_t theta) | ||
| 218 | { | ||
| 219 | uint8_t offset = theta; | ||
| 220 | if( theta & 0x40 ) { | ||
| 221 | offset = (uint8_t)255 - offset; | ||
| 222 | } | ||
| 223 | offset &= 0x3F; // 0..63 | ||
| 224 | |||
| 225 | uint8_t secoffset = offset & 0x0F; // 0..15 | ||
| 226 | if( theta & 0x40) secoffset++; | ||
| 227 | |||
| 228 | uint8_t section = offset >> 4; // 0..3 | ||
| 229 | uint8_t s2 = section * 2; | ||
| 230 | const uint8_t* p = b_m16_interleave; | ||
| 231 | p += s2; | ||
| 232 | uint8_t b = *p; | ||
| 233 | p++; | ||
| 234 | uint8_t m16 = *p; | ||
| 235 | |||
| 236 | uint8_t mx = (m16 * secoffset) >> 4; | ||
| 237 | |||
| 238 | int8_t y = mx + b; | ||
| 239 | if( theta & 0x80 ) y = -y; | ||
| 240 | |||
| 241 | y += 128; | ||
| 242 | |||
| 243 | return y; | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Fast 8-bit approximation of cos(x). This approximation never varies more than | ||
| 247 | /// 2% from the floating point value you'd get by doing | ||
| 248 | /// | ||
| 249 | /// float s = (cos(x) * 128.0) + 128; | ||
| 250 | /// | ||
| 251 | /// @param theta input angle from 0-255 | ||
| 252 | /// @returns sin of theta, value between 0 and 255 | ||
| 253 | LIB8STATIC uint8_t cos8( uint8_t theta) | ||
| 254 | { | ||
| 255 | return sin8( theta + 64); | ||
| 256 | } | ||
| 257 | |||
| 258 | ///@} | ||
| 259 | #endif | ||
