本帖最后由 Angelo 于 2014-2-28 18:00 编辑
前两天在做一个软件项目,里面需要用到很多的整数开方运算,即使Mega2560,跑起来也有点累。AVR库文件里面的确提供sqrt()这个函数用于开方,但是由于其为double类型,对于整数开方,很大的程度上浪费了资源。
之后自然而然的就想到了嵌入式汇编,找了一些相关的资料,供大家参考一下:
英文的教程,讲的比较细:http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
中文版教程,特别是其中的汇编语言支持章节: http://www.chipart.cn/doc/caavrgccv.pdf
汇编代码的源头: http://members.chello.nl/j.beentjes3/Ruud/sqrt32avr.htm#Sqrt32R
最后上代码,实测速度为sqrt()的2.5倍,相当给力~~
- #define Sqrt32(intRes, longIn1) \
- asm volatile ( \
- "ldi R27,0xc0 \n\t" \
- "clr R26 \n\t" \
- "ldi %B0,0x40 \n\t" \
- "sub %A0,%A0 \n\t" \
- "_sq32_1%=: brcs _sq32_2%= \n\t" \
- "cp %C1,%A0 \n\t" \
- "cpc %D1,%B0 \n\t" \
- "brcs _sq32_3%= \n\t" \
- "_sq32_2%=: sub %C1,%A0 \n\t" \
- "sbc %D1,%B0 \n\t" \
- "or %A0,R26 \n\t" \
- "or %B0,R27 \n\t" \
- "_sq32_3%=: lsr R27 \n\t" \
- "ror R26 \n\t" \
- "eor %B0,R27 \n\t" \
- "eor %A0,R26 \n\t" \
- "rol %A1 \n\t" \
- "rol %B1 \n\t" \
- "rol %C1 \n\t" \
- "rol %D1 \n\t" \
- "sbrs %A1,0 \n\t" \
- "rjmp _sq32_1%= \n\t" \
- "brcs _sq32_4%= \n\t" \
- "cp %A0,%C1 \n\t" \
- "cpc %B0,%D1 \n\t" \
- "brcc _sq32_5%= \n\t" \
- "_sq32_4%=: sbc %B1,R27 \n\t" \
- "sbc %C1,%A0 \n\t" \
- "sbc %D1,%B0 \n\t" \
- "inc %A0 \n\t" \
- "_sq32_5%=: lsl %B1 \n\t" \
- "rol %C1 \n\t" \
- "rol %D1 \n\t" \
- "brcs _sq32_6%= \n\t" \
- "cp %A0,%C1 \n\t" \
- "cpc %B0,%D1 \n\t" \
- "_sq32_6%=: adc %A0,R27 \n\t" \
- "adc %B0,R27 \n\t" \
- : \
- "=&r" (intRes) \
- : \
- "d" (longIn1) \
- : \
- "r26" , "r27" \
- )
-
- int result;
- unsigned long num;
- int i;
- unsigned long timer;
-
- void setup() {
- srandom(analogRead(0));
- Serial.begin(115200);
- }
-
- void loop() {
- num=random();
- timer=millis();
- for (i=0; i<1000; i++) {
- // result=sqrt(num);
- Sqrt32(result, num);
- }
- timer=millis()-timer;
- Serial.println(timer);
- }
复制代码
|