1 files changed, 242 insertions, 0 deletions
diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c
new file mode 100644
index 000000000..84b3e9c61
--- /dev/null
+++ b/lib/lib8tion/lib8tion.c
@@ -0,0 +1,242 @@
+#define FASTLED_INTERNAL
+#include <stdint.h>
+#define RAND16_SEED  1337
+uint16_t rand16seed = RAND16_SEED;
+// memset8, memcpy8, memmove8:
+//  optimized avr replacements for the standard "C" library
+//  routines memset, memcpy, and memmove.
+//
+//  There are two techniques that make these routines
+//  faster than the standard avr-libc routines.
+//  First, the loops are unrolled 2X, meaning that
+//  the average loop overhead is cut in half.
+//  And second, the compare-and-branch at the bottom
+//  of each loop decrements the low byte of the
+//  counter, and if the carry is clear, it branches
+//  back up immediately.  Only if the low byte math
+//  causes carry do we bother to decrement the high
+//  byte and check that result for carry as well.
+//  Results for a 100-byte buffer are 20-40% faster
+//  than standard avr-libc, at a cost of a few extra
+//  bytes of code.
+#if defined(__AVR__)
+//__attribute__ ((noinline))
+void * memset8 ( void * ptr, uint8_t val, uint16_t num )
+{
+    asm volatile(
+         "  movw r26, %[ptr]        \n\t"
+         "  sbrs %A[num], 0         \n\t"
+         "  rjmp Lseteven_%=        \n\t"
+         "  rjmp Lsetodd_%=         \n\t"
+         "Lsetloop_%=:              \n\t"
+         "  st X+, %[val]           \n\t"
+         "Lsetodd_%=:               \n\t"
+         "  st X+, %[val]           \n\t"
+         "Lseteven_%=:              \n\t"
+         "  subi %A[num], 2         \n\t"
+         "  brcc Lsetloop_%=        \n\t"
+         "  sbci %B[num], 0         \n\t"
+         "  brcc Lsetloop_%=        \n\t"
+         : [num] "+r" (num)
+         : [ptr]  "r" (ptr),
+           [val]  "r" (val)
+         : "memory"
+         );
+    return ptr;
+}
+//__attribute__ ((noinline))
+void * memcpy8 ( void * dst, const void* src, uint16_t num )
+{
+    asm volatile(
+         "  movw r30, %[src]        \n\t"
+         "  movw r26, %[dst]        \n\t"
+         "  sbrs %A[num], 0         \n\t"
+         "  rjmp Lcpyeven_%=        \n\t"
+         "  rjmp Lcpyodd_%=         \n\t"
+         "Lcpyloop_%=:              \n\t"
+         "  ld __tmp_reg__, Z+      \n\t"
+         "  st X+, __tmp_reg__      \n\t"
+         "Lcpyodd_%=:               \n\t"
+         "  ld __tmp_reg__, Z+      \n\t"
+         "  st X+, __tmp_reg__      \n\t"
+         "Lcpyeven_%=:              \n\t"
+         "  subi %A[num], 2         \n\t"
+         "  brcc Lcpyloop_%=        \n\t"
+         "  sbci %B[num], 0         \n\t"
+         "  brcc Lcpyloop_%=        \n\t"
+         : [num] "+r" (num)
+         : [src] "r" (src),
+           [dst] "r" (dst)
+         : "memory"
+         );
+    return dst;
+}
+//__attribute__ ((noinline))
+void * memmove8 ( void * dst, const void* src, uint16_t num )
+{
+    if( src > dst) {
+        // if src > dst then we can use the forward-stepping memcpy8
+        return memcpy8( dst, src, num);
+    } else {
+        // if src < dst then we have to step backward:
+        dst = (char*)dst + num;
+        src = (char*)src + num;
+        asm volatile(
+             "  movw r30, %[src]        \n\t"
+             "  movw r26, %[dst]        \n\t"
+             "  sbrs %A[num], 0         \n\t"
+             "  rjmp Lmoveven_%=        \n\t"
+             "  rjmp Lmovodd_%=         \n\t"
+             "Lmovloop_%=:              \n\t"
+             "  ld __tmp_reg__, -Z      \n\t"
+             "  st -X, __tmp_reg__      \n\t"
+             "Lmovodd_%=:               \n\t"
+             "  ld __tmp_reg__, -Z      \n\t"
+             "  st -X, __tmp_reg__      \n\t"
+             "Lmoveven_%=:              \n\t"
+             "  subi %A[num], 2         \n\t"
+             "  brcc Lmovloop_%=        \n\t"
+             "  sbci %B[num], 0         \n\t"
+             "  brcc Lmovloop_%=        \n\t"
+             : [num] "+r" (num)
+             : [src] "r" (src),
+               [dst] "r" (dst)
+             : "memory"
+             );
+        return dst;
+    }
+}
+#endif /* AVR */
+#if 0
+// TEST / VERIFICATION CODE ONLY BELOW THIS POINT
+#include <Arduino.h>
+#include "lib8tion.h"
+void test1abs( int8_t i)
+{
+    Serial.print("abs("); Serial.print(i); Serial.print(") = ");
+    int8_t j = abs8(i);
+    Serial.print(j); Serial.println(" ");
+}
+void testabs()
+{
+    delay(5000);
+    for( int8_t q = -128; q != 127; q++) {
+        test1abs(q);
+    }
+    for(;;){};
+}
+void testmul8()
+{
+    delay(5000);
+    byte r, c;
+    Serial.println("mul8:");
+    for( r = 0; r <= 20; r += 1) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 20; c += 1) {
+            byte t;
+            t = mul8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+void testscale8()
+{
+    delay(5000);
+    byte r, c;
+    Serial.println("scale8:");
+    for( r = 0; r <= 240; r += 10) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 240; c += 10) {
+            byte t;
+            t = scale8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println(' ');
+    Serial.println("scale8_video:");
+    for( r = 0; r <= 100; r += 4) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 100; c += 4) {
+            byte t;
+            t = scale8_video( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+void testqadd8()
+{
+    delay(5000);
+    byte r, c;
+    for( r = 0; r <= 240; r += 10) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 240; c += 10) {
+            byte t;
+            t = qadd8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+void testnscale8x3()
+{
+    delay(5000);
+    byte r, g, b, sc;
+    for( byte z = 0; z < 10; z++) {
+        r = random8(); g = random8(); b = random8(); sc = random8();
+        Serial.print("nscale8x3_video( ");
+        Serial.print(r); Serial.print(", ");
+        Serial.print(g); Serial.print(", ");
+        Serial.print(b); Serial.print(", ");
+        Serial.print(sc); Serial.print(") = [ ");
+        nscale8x3_video( r, g, b, sc);
+        Serial.print(r); Serial.print(", ");
+        Serial.print(g); Serial.print(", ");
+        Serial.print(b); Serial.print("]");
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+#endif

diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c new file mode 100644 index 000000000..84b3e9c61 --- /dev/null +++ b/lib/lib8tion/lib8tion.c
@@ -0,0 +1,242 @@
	1	#define FASTLED_INTERNAL
	2	#include <stdint.h>
	3
	4	#define RAND16_SEED 1337
	5	uint16_t rand16seed = RAND16_SEED;
	6
	7
	8	// memset8, memcpy8, memmove8:
	9	// optimized avr replacements for the standard "C" library
	10	// routines memset, memcpy, and memmove.
	11	//
	12	// There are two techniques that make these routines
	13	// faster than the standard avr-libc routines.
	14	// First, the loops are unrolled 2X, meaning that
	15	// the average loop overhead is cut in half.
	16	// And second, the compare-and-branch at the bottom
	17	// of each loop decrements the low byte of the
	18	// counter, and if the carry is clear, it branches
	19	// back up immediately. Only if the low byte math
	20	// causes carry do we bother to decrement the high
	21	// byte and check that result for carry as well.
	22	// Results for a 100-byte buffer are 20-40% faster
	23	// than standard avr-libc, at a cost of a few extra
	24	// bytes of code.
	25
	26	#if defined(__AVR__)
	27	//__attribute__ ((noinline))
	28	void * memset8 ( void * ptr, uint8_t val, uint16_t num )
	29	{
	30	asm volatile(
	31	" movw r26, %[ptr] \n\t"
	32	" sbrs %A[num], 0 \n\t"
	33	" rjmp Lseteven_%= \n\t"
	34	" rjmp Lsetodd_%= \n\t"
	35	"Lsetloop_%=: \n\t"
	36	" st X+, %[val] \n\t"
	37	"Lsetodd_%=: \n\t"
	38	" st X+, %[val] \n\t"
	39	"Lseteven_%=: \n\t"
	40	" subi %A[num], 2 \n\t"
	41	" brcc Lsetloop_%= \n\t"
	42	" sbci %B[num], 0 \n\t"
	43	" brcc Lsetloop_%= \n\t"
	44	: [num] "+r" (num)
	45	: [ptr] "r" (ptr),
	46	[val] "r" (val)
	47	: "memory"
	48	);
	49	return ptr;
	50	}
	51
	52
	53
	54	//__attribute__ ((noinline))
	55	void * memcpy8 ( void * dst, const void* src, uint16_t num )
	56	{
	57	asm volatile(
	58	" movw r30, %[src] \n\t"
	59	" movw r26, %[dst] \n\t"
	60	" sbrs %A[num], 0 \n\t"
	61	" rjmp Lcpyeven_%= \n\t"
	62	" rjmp Lcpyodd_%= \n\t"
	63	"Lcpyloop_%=: \n\t"
	64	" ld __tmp_reg__, Z+ \n\t"
	65	" st X+, __tmp_reg__ \n\t"
	66	"Lcpyodd_%=: \n\t"
	67	" ld __tmp_reg__, Z+ \n\t"
	68	" st X+, __tmp_reg__ \n\t"
	69	"Lcpyeven_%=: \n\t"
	70	" subi %A[num], 2 \n\t"
	71	" brcc Lcpyloop_%= \n\t"
	72	" sbci %B[num], 0 \n\t"
	73	" brcc Lcpyloop_%= \n\t"
	74	: [num] "+r" (num)
	75	: [src] "r" (src),
	76	[dst] "r" (dst)
	77	: "memory"
	78	);
	79	return dst;
	80	}
	81
	82	//__attribute__ ((noinline))
	83	void * memmove8 ( void * dst, const void* src, uint16_t num )
	84	{
	85	if( src > dst) {
	86	// if src > dst then we can use the forward-stepping memcpy8
	87	return memcpy8( dst, src, num);
	88	} else {
	89	// if src < dst then we have to step backward:
	90	dst = (char*)dst + num;
	91	src = (char*)src + num;
	92	asm volatile(
	93	" movw r30, %[src] \n\t"
	94	" movw r26, %[dst] \n\t"
	95	" sbrs %A[num], 0 \n\t"
	96	" rjmp Lmoveven_%= \n\t"
	97	" rjmp Lmovodd_%= \n\t"
	98	"Lmovloop_%=: \n\t"
	99	" ld __tmp_reg__, -Z \n\t"
	100	" st -X, __tmp_reg__ \n\t"
	101	"Lmovodd_%=: \n\t"
	102	" ld __tmp_reg__, -Z \n\t"
	103	" st -X, __tmp_reg__ \n\t"
	104	"Lmoveven_%=: \n\t"
	105	" subi %A[num], 2 \n\t"
	106	" brcc Lmovloop_%= \n\t"
	107	" sbci %B[num], 0 \n\t"
	108	" brcc Lmovloop_%= \n\t"
	109	: [num] "+r" (num)
	110	: [src] "r" (src),
	111	[dst] "r" (dst)
	112	: "memory"
	113	);
	114	return dst;
	115	}
	116	}
	117
	118	#endif /* AVR */
	119
	120
	121
	122
	123	#if 0
	124	// TEST / VERIFICATION CODE ONLY BELOW THIS POINT
	125	#include <Arduino.h>
	126	#include "lib8tion.h"
	127
	128	void test1abs( int8_t i)
	129	{
	130	Serial.print("abs("); Serial.print(i); Serial.print(") = ");
	131	int8_t j = abs8(i);
	132	Serial.print(j); Serial.println(" ");
	133	}
	134
	135	void testabs()
	136	{
	137	delay(5000);
	138	for( int8_t q = -128; q != 127; q++) {
	139	test1abs(q);
	140	}
	141	for(;;){};
	142	}
	143
	144
	145	void testmul8()
	146	{
	147	delay(5000);
	148	byte r, c;
	149
	150	Serial.println("mul8:");
	151	for( r = 0; r <= 20; r += 1) {
	152	Serial.print(r); Serial.print(" : ");
	153	for( c = 0; c <= 20; c += 1) {
	154	byte t;
	155	t = mul8( r, c);
	156	Serial.print(t); Serial.print(' ');
	157	}
	158	Serial.println(' ');
	159	}
	160	Serial.println("done.");
	161	for(;;){};
	162	}
	163
	164
	165	void testscale8()
	166	{
	167	delay(5000);
	168	byte r, c;
	169
	170	Serial.println("scale8:");
	171	for( r = 0; r <= 240; r += 10) {
	172	Serial.print(r); Serial.print(" : ");
	173	for( c = 0; c <= 240; c += 10) {
	174	byte t;
	175	t = scale8( r, c);
	176	Serial.print(t); Serial.print(' ');
	177	}
	178	Serial.println(' ');
	179	}
	180
	181	Serial.println(' ');
	182	Serial.println("scale8_video:");
	183
	184	for( r = 0; r <= 100; r += 4) {
	185	Serial.print(r); Serial.print(" : ");
	186	for( c = 0; c <= 100; c += 4) {
	187	byte t;
	188	t = scale8_video( r, c);
	189	Serial.print(t); Serial.print(' ');
	190	}
	191	Serial.println(' ');
	192	}
	193
	194	Serial.println("done.");
	195	for(;;){};
	196	}
	197
	198
	199
	200	void testqadd8()
	201	{
	202	delay(5000);
	203	byte r, c;
	204	for( r = 0; r <= 240; r += 10) {
	205	Serial.print(r); Serial.print(" : ");
	206	for( c = 0; c <= 240; c += 10) {
	207	byte t;
	208	t = qadd8( r, c);
	209	Serial.print(t); Serial.print(' ');
	210	}
	211	Serial.println(' ');
	212	}
	213	Serial.println("done.");
	214	for(;;){};
	215	}
	216
	217	void testnscale8x3()
	218	{
	219	delay(5000);
	220	byte r, g, b, sc;
	221	for( byte z = 0; z < 10; z++) {
	222	r = random8(); g = random8(); b = random8(); sc = random8();
	223
	224	Serial.print("nscale8x3_video( ");
	225	Serial.print(r); Serial.print(", ");
	226	Serial.print(g); Serial.print(", ");
	227	Serial.print(b); Serial.print(", ");
	228	Serial.print(sc); Serial.print(") = [ ");
	229
	230	nscale8x3_video( r, g, b, sc);
	231
	232	Serial.print(r); Serial.print(", ");
	233	Serial.print(g); Serial.print(", ");
	234	Serial.print(b); Serial.print("]");
	235
	236	Serial.println(' ');
	237	}
	238	Serial.println("done.");
	239	for(;;){};
	240	}
	241
	242	#endif