2018-11-26 00:49:19 +01:00

358 lines
9.4 KiB
C

#ifndef __INC_LIB8TION_MATH_H
#define __INC_LIB8TION_MATH_H
///@ingroup lib8tion
///@defgroup Math Basic math operations
/// Fast, efficient 8-bit math functions specifically
/// designed for high-performance LED programming.
///
/// Because of the AVR(Arduino) and ARM assembly language
/// implementations provided, using these functions often
/// results in smaller and faster code than the equivalent
/// program using plain "C" arithmetic and logic.
///@{
/// add one byte to another, saturating at 0xFF
/// @param i - first byte to add
/// @param j - second byte to add
/// @returns the sum of i & j, capped at 0xFF
LIB8STATIC uint8_t qadd8( uint8_t i, uint8_t j)
{
#if QADD8_C == 1
unsigned int t = i + j;
if( t > 255) t = 255;
return t;
#elif QADD8_AVRASM == 1
asm volatile(
/* First, add j to i, conditioning the C flag */
"add %0, %1 \n\t"
/* Now test the C flag.
If C is clear, we branch around a load of 0xFF into i.
If C is set, we go ahead and load 0xFF into i.
*/
"brcc L_%= \n\t"
"ldi %0, 0xFF \n\t"
"L_%=: "
: "+a" (i)
: "a" (j) );
return i;
#elif QADD8_ARM_DSP_ASM == 1
asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
return i;
#else
#error "No implementation for qadd8 available."
#endif
}
/// Add one byte to another, saturating at 0x7F
/// @param i - first byte to add
/// @param j - second byte to add
/// @returns the sum of i & j, capped at 0xFF
LIB8STATIC int8_t qadd7( int8_t i, int8_t j)
{
#if QADD7_C == 1
int16_t t = i + j;
if( t > 127) t = 127;
return t;
#elif QADD7_AVRASM == 1
asm volatile(
/* First, add j to i, conditioning the V flag */
"add %0, %1 \n\t"
/* Now test the V flag.
If V is clear, we branch around a load of 0x7F into i.
If V is set, we go ahead and load 0x7F into i.
*/
"brvc L_%= \n\t"
"ldi %0, 0x7F \n\t"
"L_%=: "
: "+a" (i)
: "a" (j) );
return i;
#elif QADD7_ARM_DSP_ASM == 1
asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
return i;
#else
#error "No implementation for qadd7 available."
#endif
}
/// subtract one byte from another, saturating at 0x00
/// @returns i - j with a floor of 0
LIB8STATIC uint8_t qsub8( uint8_t i, uint8_t j)
{
#if QSUB8_C == 1
int t = i - j;
if( t < 0) t = 0;
return t;
#elif QSUB8_AVRASM == 1
asm volatile(
/* First, subtract j from i, conditioning the C flag */
"sub %0, %1 \n\t"
/* Now test the C flag.
If C is clear, we branch around a load of 0x00 into i.
If C is set, we go ahead and load 0x00 into i.
*/
"brcc L_%= \n\t"
"ldi %0, 0x00 \n\t"
"L_%=: "
: "+a" (i)
: "a" (j) );
return i;
#else
#error "No implementation for qsub8 available."
#endif
}
/// add one byte to another, with one byte result
LIB8STATIC uint8_t add8( uint8_t i, uint8_t j)
{
#if ADD8_C == 1
int t = i + j;
return t;
#elif ADD8_AVRASM == 1
// Add j to i, period.
asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
return i;
#else
#error "No implementation for add8 available."
#endif
}
/// subtract one byte from another, 8-bit result
LIB8STATIC uint8_t sub8( uint8_t i, uint8_t j)
{
#if SUB8_C == 1
int t = i - j;
return t;
#elif SUB8_AVRASM == 1
// Subtract j from i, period.
asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
return i;
#else
#error "No implementation for sub8 available."
#endif
}
/// Calculate an integer average of two unsigned
/// 8-bit integer values (uint8_t).
/// Fractional results are rounded down, e.g. avg8(20,41) = 30
LIB8STATIC uint8_t avg8( uint8_t i, uint8_t j)
{
#if AVG8_C == 1
return (i + j) >> 1;
#elif AVG8_AVRASM == 1
asm volatile(
/* First, add j to i, 9th bit overflows into C flag */
"add %0, %1 \n\t"
/* Divide by two, moving C flag into high 8th bit */
"ror %0 \n\t"
: "+a" (i)
: "a" (j) );
return i;
#else
#error "No implementation for avg8 available."
#endif
}
/// Calculate an integer average of two signed 7-bit
/// integers (int8_t)
/// If the first argument is even, result is rounded down.
/// If the first argument is odd, result is result up.
LIB8STATIC int8_t avg7( int8_t i, int8_t j)
{
#if AVG7_C == 1
return ((i + j) >> 1) + (i & 0x1);
#elif AVG7_AVRASM == 1
asm volatile(
"asr %1 \n\t"
"asr %0 \n\t"
"adc %0, %1 \n\t"
: "+a" (i)
: "a" (j) );
return i;
#else
#error "No implementation for avg7 available."
#endif
}
/// Calculate the remainder of one unsigned 8-bit
/// value divided by anoter, aka A % M.
/// Implemented by repeated subtraction, which is
/// very compact, and very fast if A is 'probably'
/// less than M. If A is a large multiple of M,
/// the loop has to execute multiple times. However,
/// even in that case, the loop is only two
/// instructions long on AVR, i.e., quick.
LIB8STATIC uint8_t mod8( uint8_t a, uint8_t m)
{
#if defined(__AVR__)
asm volatile (
"L_%=: sub %[a],%[m] \n\t"
" brcc L_%= \n\t"
" add %[a],%[m] \n\t"
: [a] "+r" (a)
: [m] "r" (m)
);
#else
while( a >= m) a -= m;
#endif
return a;
}
/// Add two numbers, and calculate the modulo
/// of the sum and a third number, M.
/// In other words, it returns (A+B) % M.
/// It is designed as a compact mechanism for
/// incrementing a 'mode' switch and wrapping
/// around back to 'mode 0' when the switch
/// goes past the end of the available range.
/// e.g. if you have seven modes, this switches
/// to the next one and wraps around if needed:
/// mode = addmod8( mode, 1, 7);
/// See 'mod8' for notes on performance.
LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
{
#if defined(__AVR__)
asm volatile (
" add %[a],%[b] \n\t"
"L_%=: sub %[a],%[m] \n\t"
" brcc L_%= \n\t"
" add %[a],%[m] \n\t"
: [a] "+r" (a)
: [b] "r" (b), [m] "r" (m)
);
#else
a += b;
while( a >= m) a -= m;
#endif
return a;
}
/// 8x8 bit multiplication, with 8 bit result
LIB8STATIC uint8_t mul8( uint8_t i, uint8_t j)
{
#if MUL8_C == 1
return ((int)i * (int)(j) ) & 0xFF;
#elif MUL8_AVRASM == 1
asm volatile(
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
"mul %0, %1 \n\t"
/* Extract the LOW 8-bits (r0) */
"mov %0, r0 \n\t"
/* Restore r1 to "0"; it's expected to always be that */
"clr __zero_reg__ \n\t"
: "+a" (i)
: "a" (j)
: "r0", "r1");
return i;
#else
#error "No implementation for mul8 available."
#endif
}
/// saturating 8x8 bit multiplication, with 8 bit result
/// @returns the product of i * j, capping at 0xFF
LIB8STATIC uint8_t qmul8( uint8_t i, uint8_t j)
{
#if QMUL8_C == 1
int p = ((int)i * (int)(j) );
if( p > 255) p = 255;
return p;
#elif QMUL8_AVRASM == 1
asm volatile(
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
" mul %0, %1 \n\t"
/* If high byte of result is zero, all is well. */
" tst r1 \n\t"
" breq Lnospill_%= \n\t"
/* If high byte of result > 0, saturate low byte to 0xFF */
" ldi %0,0xFF \n\t"
" rjmp Ldone_%= \n\t"
"Lnospill_%=: \n\t"
/* Extract the LOW 8-bits (r0) */
" mov %0, r0 \n\t"
"Ldone_%=: \n\t"
/* Restore r1 to "0"; it's expected to always be that */
" clr __zero_reg__ \n\t"
: "+a" (i)
: "a" (j)
: "r0", "r1");
return i;
#else
#error "No implementation for qmul8 available."
#endif
}
/// take abs() of a signed 8-bit uint8_t
LIB8STATIC int8_t abs8( int8_t i)
{
#if ABS8_C == 1
if( i < 0) i = -i;
return i;
#elif ABS8_AVRASM == 1
asm volatile(
/* First, check the high bit, and prepare to skip if it's clear */
"sbrc %0, 7 \n"
/* Negate the value */
"neg %0 \n"
: "+r" (i) : "r" (i) );
return i;
#else
#error "No implementation for abs8 available."
#endif
}
/// square root for 16-bit integers
/// About three times faster and five times smaller
/// than Arduino's general sqrt on AVR.
LIB8STATIC uint8_t sqrt16(uint16_t x)
{
if( x <= 1) {
return x;
}
uint8_t low = 1; // lower bound
uint8_t hi, mid;
if( x > 7904) {
hi = 255;
} else {
hi = (x >> 5) + 8; // initial estimate for upper bound
}
do {
mid = (low + hi) >> 1;
if ((uint16_t)(mid * mid) > x) {
hi = mid - 1;
} else {
if( mid == 255) {
return 255;
}
low = mid + 1;
}
} while (hi >= low);
return low - 1;
}
///@}
#endif