aboutsummaryrefslogtreecommitdiff
path: root/lib/lib8tion/scale8.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lib8tion/scale8.h')
-rw-r--r--lib/lib8tion/scale8.h542
1 files changed, 542 insertions, 0 deletions
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h
new file mode 100644
index 000000000..9895fd4d7
--- /dev/null
+++ b/lib/lib8tion/scale8.h
@@ -0,0 +1,542 @@
1#ifndef __INC_LIB8TION_SCALE_H
2#define __INC_LIB8TION_SCALE_H
3
4///@ingroup lib8tion
5
6///@defgroup Scaling Scaling functions
7/// Fast, efficient 8-bit scaling functions specifically
8/// designed for high-performance LED programming.
9///
10/// Because of the AVR(Arduino) and ARM assembly language
11/// implementations provided, using these functions often
12/// results in smaller and faster code than the equivalent
13/// program using plain "C" arithmetic and logic.
14///@{
15
16/// scale one byte by a second one, which is treated as
17/// the numerator of a fraction whose denominator is 256
18/// In other words, it computes i * (scale / 256)
19/// 4 clocks AVR with MUL, 2 clocks ARM
20LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
21{
22#if SCALE8_C == 1
23#if (FASTLED_SCALE8_FIXED == 1)
24 return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
25#else
26 return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
27#endif
28#elif SCALE8_AVRASM == 1
29#if defined(LIB8_ATTINY)
30#if (FASTLED_SCALE8_FIXED == 1)
31 uint8_t work=i;
32#else
33 uint8_t work=0;
34#endif
35 uint8_t cnt=0x80;
36 asm volatile(
37#if (FASTLED_SCALE8_FIXED == 1)
38 " inc %[scale] \n\t"
39 " breq DONE_%= \n\t"
40 " clr %[work] \n\t"
41#endif
42 "LOOP_%=: \n\t"
43 /*" sbrc %[scale], 0 \n\t"
44 " add %[work], %[i] \n\t"
45 " ror %[work] \n\t"
46 " lsr %[scale] \n\t"
47 " clc \n\t"*/
48 " sbrc %[scale], 0 \n\t"
49 " add %[work], %[i] \n\t"
50 " ror %[work] \n\t"
51 " lsr %[scale] \n\t"
52 " lsr %[cnt] \n\t"
53 "brcc LOOP_%= \n\t"
54 "DONE_%=: \n\t"
55 : [work] "+r" (work), [cnt] "+r" (cnt)
56 : [scale] "r" (scale), [i] "r" (i)
57 :
58 );
59 return work;
60#else
61 asm volatile(
62#if (FASTLED_SCALE8_FIXED==1)
63 // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
64 "mul %0, %1 \n\t"
65 // Add i to r0, possibly setting the carry flag
66 "add r0, %0 \n\t"
67 // load the immediate 0 into i (note, this does _not_ touch any flags)
68 "ldi %0, 0x00 \n\t"
69 // walk and chew gum at the same time
70 "adc %0, r1 \n\t"
71#else
72 /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
73 "mul %0, %1 \n\t"
74 /* Move the high 8-bits of the product (r1) back to i */
75 "mov %0, r1 \n\t"
76 /* Restore r1 to "0"; it's expected to always be that */
77#endif
78 "clr __zero_reg__ \n\t"
79
80 : "+a" (i) /* writes to i */
81 : "a" (scale) /* uses scale */
82 : "r0", "r1" /* clobbers r0, r1 */ );
83
84 /* Return the result */
85 return i;
86#endif
87#else
88#error "No implementation for scale8 available."
89#endif
90}
91
92
93/// The "video" version of scale8 guarantees that the output will
94/// be only be zero if one or both of the inputs are zero. If both
95/// inputs are non-zero, the output is guaranteed to be non-zero.
96/// This makes for better 'video'/LED dimming, at the cost of
97/// several additional cycles.
98LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
99{
100#if SCALE8_C == 1 || defined(LIB8_ATTINY)
101 uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
102 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
103 // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
104 return j;
105#elif SCALE8_AVRASM == 1
106 uint8_t j=0;
107 asm volatile(
108 " tst %[i]\n\t"
109 " breq L_%=\n\t"
110 " mul %[i], %[scale]\n\t"
111 " mov %[j], r1\n\t"
112 " clr __zero_reg__\n\t"
113 " cpse %[scale], r1\n\t"
114 " subi %[j], 0xFF\n\t"
115 "L_%=: \n\t"
116 : [j] "+a" (j)
117 : [i] "a" (i), [scale] "a" (scale)
118 : "r0", "r1");
119
120 return j;
121 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
122 // asm volatile(
123 // " tst %0 \n"
124 // " breq L_%= \n"
125 // " mul %0, %1 \n"
126 // " mov %0, r1 \n"
127 // " add %0, %2 \n"
128 // " clr __zero_reg__ \n"
129 // "L_%=: \n"
130
131 // : "+a" (i)
132 // : "a" (scale), "a" (nonzeroscale)
133 // : "r0", "r1");
134
135 // // Return the result
136 // return i;
137#else
138#error "No implementation for scale8_video available."
139#endif
140}
141
142
143/// This version of scale8 does not clean up the R1 register on AVR
144/// If you are doing several 'scale8's in a row, use this, and
145/// then explicitly call cleanup_R1.
146LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
147{
148#if SCALE8_C == 1
149#if (FASTLED_SCALE8_FIXED == 1)
150 return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
151#else
152 return ((int)i * (int)(scale) ) >> 8;
153#endif
154#elif SCALE8_AVRASM == 1
155 asm volatile(
156 #if (FASTLED_SCALE8_FIXED==1)
157 // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
158 "mul %0, %1 \n\t"
159 // Add i to r0, possibly setting the carry flag
160 "add r0, %0 \n\t"
161 // load the immediate 0 into i (note, this does _not_ touch any flags)
162 "ldi %0, 0x00 \n\t"
163 // walk and chew gum at the same time
164 "adc %0, r1 \n\t"
165 #else
166 /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
167 "mul %0, %1 \n\t"
168 /* Move the high 8-bits of the product (r1) back to i */
169 "mov %0, r1 \n\t"
170 #endif
171 /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
172 /* "clr __zero_reg__ \n\t" */
173
174 : "+a" (i) /* writes to i */
175 : "a" (scale) /* uses scale */
176 : "r0", "r1" /* clobbers r0, r1 */ );
177
178 // Return the result
179 return i;
180#else
181#error "No implementation for scale8_LEAVING_R1_DIRTY available."
182#endif
183}
184
185
186/// This version of scale8_video does not clean up the R1 register on AVR
187/// If you are doing several 'scale8_video's in a row, use this, and
188/// then explicitly call cleanup_R1.
189LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
190{
191#if SCALE8_C == 1 || defined(LIB8_ATTINY)
192 uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
193 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
194 // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
195 return j;
196#elif SCALE8_AVRASM == 1
197 uint8_t j=0;
198 asm volatile(
199 " tst %[i]\n\t"
200 " breq L_%=\n\t"
201 " mul %[i], %[scale]\n\t"
202 " mov %[j], r1\n\t"
203 " breq L_%=\n\t"
204 " subi %[j], 0xFF\n\t"
205 "L_%=: \n\t"
206 : [j] "+a" (j)
207 : [i] "a" (i), [scale] "a" (scale)
208 : "r0", "r1");
209
210 return j;
211 // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
212 // asm volatile(
213 // " tst %0 \n"
214 // " breq L_%= \n"
215 // " mul %0, %1 \n"
216 // " mov %0, r1 \n"
217 // " add %0, %2 \n"
218 // " clr __zero_reg__ \n"
219 // "L_%=: \n"
220
221 // : "+a" (i)
222 // : "a" (scale), "a" (nonzeroscale)
223 // : "r0", "r1");
224
225 // // Return the result
226 // return i;
227#else
228#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
229#endif
230}
231
232/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
233LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void)
234{
235#if CLEANUP_R1_AVRASM == 1
236 // Restore r1 to "0"; it's expected to always be that
237 asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
238#endif
239}
240
241
242/// scale a 16-bit unsigned value by an 8-bit value,
243/// considered as numerator of a fraction whose denominator
244/// is 256. In other words, it computes i * (scale / 256)
245
246LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
247{
248#if SCALE16BY8_C == 1
249 uint16_t result;
250#if FASTLED_SCALE8_FIXED == 1
251 result = (i * (1+((uint16_t)scale))) >> 8;
252#else
253 result = (i * scale) / 256;
254#endif
255 return result;
256#elif SCALE16BY8_AVRASM == 1
257#if FASTLED_SCALE8_FIXED == 1
258 uint16_t result = 0;
259 asm volatile(
260 // result.A = HighByte( (i.A x scale) + i.A )
261 " mul %A[i], %[scale] \n\t"
262 " add r0, %A[i] \n\t"
263 // " adc r1, [zero] \n\t"
264 // " mov %A[result], r1 \n\t"
265 " adc %A[result], r1 \n\t"
266
267 // result.A-B += i.B x scale
268 " mul %B[i], %[scale] \n\t"
269 " add %A[result], r0 \n\t"
270 " adc %B[result], r1 \n\t"
271
272 // cleanup r1
273 " clr __zero_reg__ \n\t"
274
275 // result.A-B += i.B
276 " add %A[result], %B[i] \n\t"
277 " adc %B[result], __zero_reg__ \n\t"
278
279 : [result] "+r" (result)
280 : [i] "r" (i), [scale] "r" (scale)
281 : "r0", "r1"
282 );
283 return result;
284#else
285 uint16_t result = 0;
286 asm volatile(
287 // result.A = HighByte(i.A x j )
288 " mul %A[i], %[scale] \n\t"
289 " mov %A[result], r1 \n\t"
290 //" clr %B[result] \n\t"
291
292 // result.A-B += i.B x j
293 " mul %B[i], %[scale] \n\t"
294 " add %A[result], r0 \n\t"
295 " adc %B[result], r1 \n\t"
296
297 // cleanup r1
298 " clr __zero_reg__ \n\t"
299
300 : [result] "+r" (result)
301 : [i] "r" (i), [scale] "r" (scale)
302 : "r0", "r1"
303 );
304 return result;
305#endif
306#else
307 #error "No implementation for scale16by8 available."
308#endif
309}
310
311/// scale a 16-bit unsigned value by a 16-bit value,
312/// considered as numerator of a fraction whose denominator
313/// is 65536. In other words, it computes i * (scale / 65536)
314
315LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
316{
317 #if SCALE16_C == 1
318 uint16_t result;
319#if FASTLED_SCALE8_FIXED == 1
320 result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
321#else
322 result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
323#endif
324 return result;
325#elif SCALE16_AVRASM == 1
326#if FASTLED_SCALE8_FIXED == 1
327 // implemented sort of like
328 // result = ((i * scale) + i ) / 65536
329 //
330 // why not like this, you may ask?
331 // result = (i * (scale+1)) / 65536
332 // the answer is that if scale is 65535, then scale+1
333 // will be zero, which is not what we want.
334 uint32_t result;
335 asm volatile(
336 // result.A-B = i.A x scale.A
337 " mul %A[i], %A[scale] \n\t"
338 // save results...
339 // basic idea:
340 //" mov %A[result], r0 \n\t"
341 //" mov %B[result], r1 \n\t"
342 // which can be written as...
343 " movw %A[result], r0 \n\t"
344 // Because we're going to add i.A-B to
345 // result.A-D, we DO need to keep both
346 // the r0 and r1 portions of the product
347 // UNlike in the 'unfixed scale8' version.
348 // So the movw here is needed.
349 : [result] "=r" (result)
350 : [i] "r" (i),
351 [scale] "r" (scale)
352 : "r0", "r1"
353 );
354
355 asm volatile(
356 // result.C-D = i.B x scale.B
357 " mul %B[i], %B[scale] \n\t"
358 //" mov %C[result], r0 \n\t"
359 //" mov %D[result], r1 \n\t"
360 " movw %C[result], r0 \n\t"
361 : [result] "+r" (result)
362 : [i] "r" (i),
363 [scale] "r" (scale)
364 : "r0", "r1"
365 );
366
367 const uint8_t zero = 0;
368 asm volatile(
369 // result.B-D += i.B x scale.A
370 " mul %B[i], %A[scale] \n\t"
371
372 " add %B[result], r0 \n\t"
373 " adc %C[result], r1 \n\t"
374 " adc %D[result], %[zero] \n\t"
375
376 // result.B-D += i.A x scale.B
377 " mul %A[i], %B[scale] \n\t"
378
379 " add %B[result], r0 \n\t"
380 " adc %C[result], r1 \n\t"
381 " adc %D[result], %[zero] \n\t"
382
383 // cleanup r1
384 " clr r1 \n\t"
385
386 : [result] "+r" (result)
387 : [i] "r" (i),
388 [scale] "r" (scale),
389 [zero] "r" (zero)
390 : "r0", "r1"
391 );
392
393 asm volatile(
394 // result.A-D += i.A-B
395 " add %A[result], %A[i] \n\t"
396 " adc %B[result], %B[i] \n\t"
397 " adc %C[result], %[zero] \n\t"
398 " adc %D[result], %[zero] \n\t"
399 : [result] "+r" (result)
400 : [i] "r" (i),
401 [zero] "r" (zero)
402 );
403
404 result = result >> 16;
405 return result;
406#else
407 uint32_t result;
408 asm volatile(
409 // result.A-B = i.A x scale.A
410 " mul %A[i], %A[scale] \n\t"
411 // save results...
412 // basic idea:
413 //" mov %A[result], r0 \n\t"
414 //" mov %B[result], r1 \n\t"
415 // which can be written as...
416 " movw %A[result], r0 \n\t"
417 // We actually don't need to do anything with r0,
418 // as result.A is never used again here, so we
419 // could just move the high byte, but movw is
420 // one clock cycle, just like mov, so might as
421 // well, in case we want to use this code for
422 // a generic 16x16 multiply somewhere.
423
424 : [result] "=r" (result)
425 : [i] "r" (i),
426 [scale] "r" (scale)
427 : "r0", "r1"
428 );
429
430 asm volatile(
431 // result.C-D = i.B x scale.B
432 " mul %B[i], %B[scale] \n\t"
433 //" mov %C[result], r0 \n\t"
434 //" mov %D[result], r1 \n\t"
435 " movw %C[result], r0 \n\t"
436 : [result] "+r" (result)
437 : [i] "r" (i),
438 [scale] "r" (scale)
439 : "r0", "r1"
440 );
441
442 const uint8_t zero = 0;
443 asm volatile(
444 // result.B-D += i.B x scale.A
445 " mul %B[i], %A[scale] \n\t"
446
447 " add %B[result], r0 \n\t"
448 " adc %C[result], r1 \n\t"
449 " adc %D[result], %[zero] \n\t"
450
451 // result.B-D += i.A x scale.B
452 " mul %A[i], %B[scale] \n\t"
453
454 " add %B[result], r0 \n\t"
455 " adc %C[result], r1 \n\t"
456 " adc %D[result], %[zero] \n\t"
457
458 // cleanup r1
459 " clr r1 \n\t"
460
461 : [result] "+r" (result)
462 : [i] "r" (i),
463 [scale] "r" (scale),
464 [zero] "r" (zero)
465 : "r0", "r1"
466 );
467
468 result = result >> 16;
469 return result;
470#endif
471#else
472 #error "No implementation for scale16 available."
473#endif
474}
475///@}
476
477///@defgroup Dimming Dimming and brightening functions
478///
479/// Dimming and brightening functions
480///
481/// The eye does not respond in a linear way to light.
482/// High speed PWM'd LEDs at 50% duty cycle appear far
483/// brighter then the 'half as bright' you might expect.
484///
485/// If you want your midpoint brightness leve (128) to
486/// appear half as bright as 'full' brightness (255), you
487/// have to apply a 'dimming function'.
488///@{
489
490/// Adjust a scaling value for dimming
491LIB8STATIC uint8_t dim8_raw( uint8_t x)
492{
493 return scale8( x, x);
494}
495
496/// Adjust a scaling value for dimming for video (value will never go below 1)
497LIB8STATIC uint8_t dim8_video( uint8_t x)
498{
499 return scale8_video( x, x);
500}
501
502/// Linear version of the dimming function that halves for values < 128
503LIB8STATIC uint8_t dim8_lin( uint8_t x )
504{
505 if( x & 0x80 ) {
506 x = scale8( x, x);
507 } else {
508 x += 1;
509 x /= 2;
510 }
511 return x;
512}
513
514/// inverse of the dimming function, brighten a value
515LIB8STATIC uint8_t brighten8_raw( uint8_t x)
516{
517 uint8_t ix = 255 - x;
518 return 255 - scale8( ix, ix);
519}
520
521/// inverse of the dimming function, brighten a value
522LIB8STATIC uint8_t brighten8_video( uint8_t x)
523{
524 uint8_t ix = 255 - x;
525 return 255 - scale8_video( ix, ix);
526}
527
528/// inverse of the dimming function, brighten a value
529LIB8STATIC uint8_t brighten8_lin( uint8_t x )
530{
531 uint8_t ix = 255 - x;
532 if( ix & 0x80 ) {
533 ix = scale8( ix, ix);
534 } else {
535 ix += 1;
536 ix /= 2;
537 }
538 return 255 - ix;
539}
540
541///@}
542#endif