Ben Bacarisse said:
Joe Pfeiffer said:
I would be very surprised to see a recursive Fibonacci that is both
anywhere near as efficient as the obvious iterative solution
OK:
typedef unsigned long long number;
static number fib_aux(number a, number b, int n)
{
if (n <= 1) return a;
else return fib_aux(b, a+b, n-1);
}
number rfib(int n) { return fib_aux(1, 1, n); }
number ifib(int n)
{
number a = 1, b = 1;
while (n-- > 1) {
number t = a;
a = b;
b = t + b;
}
return a;
}
Compiled separately (to prevent in-lining) from a driver:
#include <stdio.h>
#include <stdlib.h>
typedef unsigned long long number;
number rfib(int);
number ifib(int);
int main(int argc, char **argv)
{
int n = argc > 1 ? atoi(argv[1]) : 10;
number sdiff = 0;
for (int i = 1; i <= n; i++)
sdiff += rfib(i) - ifib(i);
printf("%lld\n", sdiff);
return 0;
}
Here is the gprof output from running this program with argv[1] 100000
(even with unsigned long long, you are not actually getting the right
numbers, but it's hard to test otherwise).
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls us/call us/call name
52.69 3.40 3.40 100000 34.04 34.04 ifib
48.32 6.53 3.12 100000 31.21 31.21 rfib
I will admit to picking the best of three to make a point, but in all
cases ifib and rfib were timed at essentially the same speed -- which
comes out faster is in statistical noise. (Compiled with -O2, though it
may depend on compiler version.)
Looking at the code, the first thing that went through my mind was
"they should compile to essentially the same thing".
I don't know what they do on your platform, but GCC on POWER supports
that rather well:
00000000 <rfib>:
typedef unsigned long long number;
static number fib_aux(number a, number b, int n)
{
if (n <= 1) return a;
0: 2f 83 00 01 cmpwi cr7,r3,1
4: 39 20 00 00 li r9,0
8: 39 40 00 01 li r10,1
c: 40 9d 00 40 ble- cr7,4c <rfib+0x4c>
10: 38 63 ff ff addi r3,r3,-1
14: 39 60 00 00 li r11,0
18: 7c 69 03 a6 mtctr r3
1c: 39 80 00 01 li r12,1
20: 48 00 00 20 b 40 <rfib+0x40>
24: 60 00 00 00 nop
28: 60 00 00 00 nop
2c: 60 00 00 00 nop
30: 7d 4c 53 78 mr r12,r10
34: 7d 2b 4b 78 mr r11,r9
38: 7d 0a 43 78 mr r10,r8
3c: 7c e9 3b 78 mr r9,r7
else return fib_aux(b, a+b, n-1);
40: 7d 0a 60 14 addc r8,r10,r12
44: 7c e9 59 14 adde r7,r9,r11
typedef unsigned long long number;
static number fib_aux(number a, number b, int n)
{
if (n <= 1) return a;
48: 42 00 ff e8 bdnz+ 30 <rfib+0x30>
else return fib_aux(b, a+b, n-1);
}
number rfib(int n) { return fib_aux(1, 1, n); }
4c: 7d 44 53 78 mr r4,r10
50: 7d 23 4b 78 mr r3,r9
54: 4e 80 00 20 blr
58: 60 00 00 00 nop
5c: 60 00 00 00 nop
00000060 <ifib>:
number ifib(int n)
{
number a = 1, b = 1;
while (n-- > 1) {
60: 2f 83 00 01 cmpwi cr7,r3,1
64: 39 20 00 00 li r9,0
68: 39 40 00 01 li r10,1
6c: 40 9d 00 40 ble- cr7,ac <ifib+0x4c>
70: 38 63 ff ff addi r3,r3,-1
74: 39 60 00 00 li r11,0
78: 7c 69 03 a6 mtctr r3
7c: 39 80 00 01 li r12,1
80: 48 00 00 18 b 98 <ifib+0x38>
84: 60 00 00 00 nop
88: 60 00 00 00 nop
8c: 60 00 00 00 nop
90: 7d 0a 43 78 mr r10,r8
94: 7c e9 3b 78 mr r9,r7
number t = a;
a = b;
b = t + b;
98: 7d 0a 60 14 addc r8,r10,r12
9c: 7c e9 59 14 adde r7,r9,r11
number rfib(int n) { return fib_aux(1, 1, n); }
number ifib(int n)
{
number a = 1, b = 1;
while (n-- > 1) {
a0: 7d 4c 53 78 mr r12,r10
a4: 7d 2b 4b 78 mr r11,r9
a8: 42 00 ff e8 bdnz+ 90 <ifib+0x30>
number t = a;
a = b;
b = t + b;
}
return a;
}
ac: 7d 44 53 78 mr r4,r10
b0: 7d 23 4b 78 mr r3,r9
b4: 4e 80 00 20 blr
A POWER machine (running debian stable) is far more metronomic
than many other machines, and the lack of statistical noise is
quite interesting:
% cumulative self self total
time seconds seconds calls us/call us/call name
50.24 12.60 12.60 100000 126.00 126.00 ifib
49.76 25.08 12.48 100000 124.80 124.80 rfib
50.28 12.61 12.61 100000 126.10 126.10 ifib
49.72 25.08 12.47 100000 124.70 124.70 rfib
50.22 12.60 12.60 100000 126.00 126.00 rfib
49.78 25.09 12.49 100000 124.90 124.90 ifib
I think the recursive function makes the rule that defines the sequence
much clearer than the loop does, but I accept that that may be a matter
of opinion. That aside, I don't think you can say that it's not
"remotely near as clear" as the alternatives.
Your recursive function would be my method of choice, thanks for going
to the effort of selling tail recursion so effectively.
Phil