Programming tools for your new processor
Compiler Produced FFT assembly code
; Archelon URCC C 3.17 2003/08/11 MCSYS = ucode
; - fft.c at Wed Aug 13 13:14:07 2003
; struct complex { float rp, ip; } ;
; struct complex z[ 256 +1], w[ 256 +1],
; e[ 129 +1];
; float zr, zi;
; extern int seed;
; float Cos (x) float x;
.segment fft_code,memtype=0,locinc=1,rom
.global _Cos
_Cos:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
addi_l R31,0x8,R31
strri_l R0,R30,0x0
strri_l R1,R30,0x4
; ENTRY
cvtdf R24,R1
; x at argloc 0 (0x0) in reg size 4
; i in reg size 2
; factor in reg size 2
; result in reg size 4
; power in reg size 4
; {
; int i, factor;
; float result,power;
; result = 1.0; factor = 1; power = x;
ldzi_f __fconsf1,R24
movi_w 0x1,R26
; for ( i = 2; i <= 10; i++ ) {
movi_w 0x2,R25
mov_l R1,R27
Cos_L1:
; factor = factor * i; power = power*x;
mul_w R26,R25,R26
mul_f R27,R1,R27
; if ( (i & 1) == 0 ) {
movi_w 0x0,R0
movi_w 0x1,R0
andi_w R25,0x1,R0
cmpi_w R0,0x0
bne Cos_L2
; if ( (i & 3) == 0 ) result = result + power/factor;
andi_w R25,0x3,R0
cmpi_w R0,0x0
bne Cos_L3
cvtwf R26,R0
div_f R27,R0,R0
add_f R0,R24,R24
br Cos_L4
Cos_L3:
; else result = result - power/factor;
cvtwf R26,R0
div_f R27,R0,R0
sub_f R24,R0,R24
Cos_L4:
; }
Cos_L2:
addi_w R25,0x1,R25
cmpi_w R25,0xa
ble Cos_L1
; }
; return (result);
; EXIT
; .temp0 at stkloc 0 (0x0) size 8
ldri_l R30,0x0,R0
ldri_l R30,0x4,R1
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; int Min0( arg1, arg2) int arg1, arg2;
.global _Min0
_Min0:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
; ENTRY
; arg1 at argloc 0 (0x0) in reg size 2
; arg2 at argloc 0 (0x0) in reg size 2
; {
; if ( arg1 < arg2 )
cmp_w R24,R25
bge Min0_L1
; return (arg1);
br Min0_LL1
Min0_L1:
; else
; return (arg2);
mov_w R25,R24
Min0_LL1:
; EXIT
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; void
; Printcomplex( arg1, arg2, zarray, start, finish, increment)
; int arg1, arg2, start, finish, increment;
; struct complex zarray[];
.global _Printcomplex
_Printcomplex:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
addi_l R31,0x10,R31
strri_l R0,R30,0x0
strri_l R1,R30,0x4
strri_l R2,R30,0x8
strri_l R28,R30,0xc
; ENTRY
mov_l R26,R1
; zarray at argloc 0 (0x0) in reg size 4
mov_w R27,R0
; start at argloc 0 (0x0) in reg size 2
; finish at argloc -6 (0xfffffffa) size 2
ldri_w R30,-0x8,R2
; increment at argloc -8 (0xfffffff8) in reg size 2
; i in reg size 2
; {
; int i;
; printf("\n") ;
ldzi_l __pcons1,R24
call _printf
; i = start;
; do {
Printcomplex_L2:
; printf(" %15.3e%15.3e",zarray[i].rp,zarray[i].ip) ;
addi_l R31,0x8,R31
ldzi_l __pcons2,R24
cvtswl R0,R25
llsi_l R25,0x3,R25
add_l R25,R1,R27
ldri_f R27,0x0,R25
cvtfd R25,R25
ldri_f R27,0x4,R27
cvtfd R27,R27
strri_d R27,R31,-0x8
call _printf
subi_l R31,0x8,R31
; i = i + increment;
add_w R0,R2,R0
; printf(" %15.3e%15.3e",zarray[i].rp,zarray[i].ip) ;
addi_l R31,0x8,R31
ldzi_l __pcons2,R24
cvtswl R0,R25
llsi_l R25,0x3,R25
add_l R25,R1,R27
ldri_f R27,0x0,R25
cvtfd R25,R25
ldri_f R27,0x4,R27
cvtfd R27,R27
strri_d R27,R31,-0x8
call _printf
subi_l R31,0x8,R31
; printf("\n");
ldzi_l __pcons1,R24
call _printf
; i = i + increment ;
add_w R0,R2,R0
ldri_w R30,-0x6,R24
cmp_w R0,R24
ble Printcomplex_L2
Printcomplex_L1:
; } while ( i <= finish );
; EXIT
; .temp1 at stkloc 0 (0x0) size 16
ldri_l R30,0x0,R0
ldri_l R30,0x4,R1
ldri_l R30,0x8,R2
ldri_l R30,0xc,R28
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; ****** Warning: Symbol ``arg1'' (function Printcomplex) was defined, but not used
; ****** Warning: Symbol ``arg2'' (function Printcomplex) was defined, but not used
; void
; Uniform11(iy, yfl) int iy; float yfl;
.global _Uniform11
_Uniform11:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
; ENTRY
; iy at argloc 0 (0x0) in reg size 2
cvtdf R25,R25
; yfl at argloc 0 (0x0) in reg size 4
; {
; iy = (4855*iy + 1731) & 8191;
muli_w R24,0x12f7,R24
addi_w R24,0x6c3,R24
andi_w R24,0x1fff,R26
; yfl = iy/8192.0;
ldzi_d __dcons3,R24
cvtwd R26,R26
div_d R26,R24,R24
cvtdf R24,R24
; EXIT
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; void
; Exptab(n, e) int n; struct complex e[];
.global _Exptab
_Exptab:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
addi_l R31,0x84,R31
strri_l R0,R30,0x68
strri_l R1,R30,0x6c
strri_l R2,R30,0x70
strri_l R3,R30,0x74
strri_l R4,R30,0x78
strri_l R5,R30,0x7c
strri_l R6,R30,0x80
; ENTRY
mov_w R24,R4
; n at argloc 0 (0x0) in reg size 2
mov_l R25,R1
; e at argloc 0 (0x0) in reg size 4
; theta in reg size 4
; divisor in reg size 4
; h at stkloc 0 (0x0) size 104
; i in reg size 2
; j in reg size 2
; k in reg size 2
; l in reg size 2
; m in reg size 2
; {
; float theta, divisor, h[26];
; int i, j, k, l, m;
; theta = 3.1415926536;
ldzi_f __fcons5,R3
; divisor = 4.0;
ldzi_f __fcons6,R2
; for ( i=1; i <= 25; i++ )
movi_w 0x1,R0
Exptab_L1:
; {
; h[i] = 1/(2*Cos( theta/divisor ));
div_f R3,R2,R24
cvtfd R24,R24
call _Cos
ldzi_f __fconsf2,R25
mul_f R24,R25,R25
ldzi_f __fconsf1,R24
div_f R24,R25,R25
cvtswl R0,R24
llsi_l R24,0x2,R24
strrr_f R25,R30,R24
; divisor = divisor + divisor;
add_f R2,R2,R2
addi_w R0,0x1,R0
cmpi_w R0,0x19
ble Exptab_L1
; }
; m = n / 2 ;
divi_w R4,0x2,R6
; l = m / 2 ;
divi_w R6,0x2,R26
; j = 1 ;
movi_w 0x1,R24
; e[1].rp = 1.0 ;
ldzi_f __fconsf1,R0
strri_f R0,R1,0x8
; e[1].ip = 0.0;
ldzi_f __fconsf0,R25
strri_f R25,R1,0xc
; e[l+1].rp = 0.0;
addi_w R26,0x1,R27
cvtswl R27,R27
llsi_l R27,0x3,R27
add_l R27,R1,R27
strri_f R25,R27,0x0
; e[l+1].ip = 1.0 ;
strri_f R0,R27,0x4
; e[m+1].rp = -1.0 ;
addi_w R6,0x1,R27
cvtswl R27,R27
llsi_l R27,0x3,R27
add_l R27,R1,R27
ldzi_f __fcons4,R0
strri_f R0,R27,0x0
; e[m+1].ip = 0.0 ;
strri_f R25,R27,0x4
; do {
Exptab_L3:
; i = l / 2 ;
divi_w R26,0x2,R0
; k = i ;
mov_l R0,R25
; do {
Exptab_L5:
; e[k+1].rp = h[j]*(e[k+i+1].rp+e[k-i+1].rp) ;
cvtswl R24,R27
llsi_l R27,0x2,R27
ldrr_f R30,R27,R27
sub_w R25,R0,R2
addi_w R2,0x1,R2
cvtswl R2,R2
llsi_l R2,0x3,R2
add_l R2,R1,R3
ldri_f R3,0x0,R2
add_w R25,R0,R4
addi_w R4,0x1,R4
cvtswl R4,R4
llsi_l R4,0x3,R4
add_l R4,R1,R4
ldri_f R4,0x0,R5
add_f R5,R2,R2
mul_f R27,R2,R2
addi_w R25,0x1,R5
cvtswl R5,R5
llsi_l R5,0x3,R5
add_l R5,R1,R5
strri_f R2,R5,0x0
; e[k+1].ip = h[j]*(e[k+i+1].ip+e[k-i+1].ip) ;
ldri_f R3,0x4,R2
ldri_f R4,0x4,R3
add_f R3,R2,R2
mul_f R27,R2,R27
strri_f R27,R5,0x4
; k = k+l ;
add_w R25,R26,R25
cmp_w R25,R6
ble Exptab_L5
Exptab_L4:
; } while ( k <= m );
; j = Min0( j+1, 25);
addi_w R24,0x1,R24
movi_w 0x19,R25
call _Min0
; l = i ;
cmpi_w R0,0x1
mov_l R0,R26
bgt Exptab_L3
Exptab_L2:
; } while ( l > 1 );
; EXIT
; .temp2 at stkloc 104 (0x68) size 28
ldri_l R30,0x68,R0
ldri_l R30,0x6c,R1
ldri_l R30,0x70,R2
ldri_l R30,0x74,R3
ldri_l R30,0x78,R4
ldri_l R30,0x7c,R5
ldri_l R30,0x80,R6
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; void
; Fft( n, z, w, e, sqrinv)
; int n; struct complex z[], w[]; struct complex e[]; float sqrinv;
.global _Fft
_Fft:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
addi_l R31,0x38,R31
strri_l R0,R30,0x0
strri_l R1,R30,0x4
strri_l R2,R30,0x8
strri_l R3,R30,0xc
strri_l R4,R30,0x10
strri_l R5,R30,0x14
strri_l R6,R30,0x18
strri_l R7,R30,0x1c
strri_l R8,R30,0x20
strri_l R9,R30,0x24
strri_l R10,R30,0x28
strri_l R11,R30,0x2c
strri_l R12,R30,0x30
strri_l R13,R30,0x34
; ENTRY
; n at argloc 0 (0x0) in reg size 2
; z at argloc 0 (0x0) in reg size 4
; w at argloc 0 (0x0) in reg size 4
; e at argloc 0 (0x0) in reg size 4
ldri_d R30,-0xc,R0
cvtdf R0,R7
; sqrinv at argloc -12 (0xfffffff4) in reg size 4
; i in reg size 2
; j in reg size 2
; k in reg size 2
; l in reg size 2
; m in reg size 2
; index in reg size 2
; {
; int i, j, k, l, m, index;
; m = n / 2 ;
divi_w R24,0x2,R13
; l = 1 ;
movi_w 0x1,R1
; do {
Fft_L2:
; k = 0 ;
movi_w 0x0,R3
; j = l ;
; i = 1 ;
movi_w 0x1,R0
mov_l R1,R2
; do {
Fft_L4:
; do {
Fft_L6:
; w[i+k].rp = z[i].rp+z[m+i].rp ;
add_w R13,R0,R4
cvtswl R4,R4
llsi_l R4,0x3,R4
add_l R4,R25,R8
ldri_f R8,0x0,R5
cvtswl R0,R4
llsi_l R4,0x3,R4
add_l R4,R25,R10
ldri_f R10,0x0,R4
add_f R4,R5,R5
add_w R0,R3,R4
cvtswl R4,R4
llsi_l R4,0x3,R4
add_l R4,R26,R4
strri_f R5,R4,0x0
; w[i+k].ip = z[i].ip+z[m+i].ip ;
ldri_f R8,0x4,R5
ldri_f R10,0x4,R6
add_f R6,R5,R5
strri_f R5,R4,0x4
; w[i+j].rp = e[k+1].rp*(z[i].rp-z[i+m].rp)
movi_w 0x1,R4
addi_w R3,0x1,R4
cvtswl R4,R4
llsi_l R4,0x3,R4
add_l R4,R27,R11
ldri_f R11,0x4,R9
ldri_f R8,0x4,R5
ldri_f R10,0x4,R4
sub_f R4,R5,R4
mul_f R9,R4,R5
ldri_f R8,0x0,R6
ldri_f R10,0x0,R12
sub_f R12,R6,R6
ldri_f R11,0x0,R12
mul_f R12,R6,R6
sub_f R6,R5,R6
add_w R0,R2,R5
cvtswl R5,R5
llsi_l R5,0x3,R5
add_l R5,R26,R5
strri_f R6,R5,0x0
; -e[k+1].ip*(z[i].ip-z[i+m].ip) ;
; w[i+j].ip = e[k+1].rp*(z[i].ip-z[i+m].ip)
ldri_f R8,0x0,R6
ldri_f R10,0x0,R8
sub_f R8,R6,R6
mul_f R9,R6,R6
ldri_f R11,0x0,R8
mul_f R8,R4,R4
add_f R4,R6,R4
strri_f R4,R5,0x4
; +e[k+1].ip*(z[i].rp-z[i+m].rp) ;
; i = i+1 ;
addi_w R0,0x1,R0
cmp_w R0,R2
ble Fft_L6
Fft_L5:
; } while ( i <= j );
; k = j ;
; j = k+l ;
add_w R2,R1,R4
cmp_w R4,R13
mov_l R2,R3
mov_l R4,R2
ble Fft_L4
Fft_L3:
; } while ( j <= m );
; index = 1;
movi_w 0x1,R0
; do {
Fft_L8:
; z[index] = w[index];
cvtswl R0,R2
llsi_l R2,0x3,R2
add_l R2,R26,R3
ldri_d R3,0x0,R3
add_l R2,R25,R2
strri_d R3,R2,0x0
; index = index+1;
addi_w R0,0x1,R0
cmp_w R0,R24
ble Fft_L8
Fft_L7:
; } while ( index <= n );
; l = l+l ;
add_w R1,R1,R1
cmp_w R1,R13
ble Fft_L2
Fft_L1:
; } while ( l <= m );
; for ( i = 1; i <= n; i++ )
movi_w 0x1,R26
cmp_w R26,R24
bgt Fft_L9
Fft_L10:
; {
; z[i].rp = sqrinv*z[i].rp ;
cvtswl R26,R27
llsi_l R27,0x3,R27
add_l R27,R25,R27
ldri_f R27,0x0,R0
mul_f R0,R7,R0
strri_f R0,R27,0x0
; z[i].ip = -sqrinv*z[i].ip;
ldri_f R27,0x4,R0
neg_f R7,R1
mul_f R1,R0,R0
strri_f R0,R27,0x4
addi_w R26,0x1,R26
cmp_w R26,R24
ble Fft_L10
Fft_L9:
; }
; EXIT
; .temp3 at stkloc 0 (0x0) size 56
ldri_l R30,0x0,R0
ldri_l R30,0x4,R1
ldri_l R30,0x8,R2
ldri_l R30,0xc,R3
ldri_l R30,0x10,R4
ldri_l R30,0x14,R5
ldri_l R30,0x18,R6
ldri_l R30,0x1c,R7
ldri_l R30,0x20,R8
ldri_l R30,0x24,R9
ldri_l R30,0x28,R10
ldri_l R30,0x2c,R11
ldri_l R30,0x30,R12
ldri_l R30,0x34,R13
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
; void
; Oscar()
.global _Oscar
_Oscar:
addi_l R31,0x4,R31
strri_l R30,R31,-0x4
mov_l R31,R30
addi_l R31,0x10,R31
strri_l R0,R30,0x0
strri_l R1,R30,0x4
strri_l R2,R30,0x8
strri_l R28,R30,0xc
; ENTRY
; i in reg size 2
; {
; int i;
; Exptab( 256 ,e) ;
movi_w 0x100,R24
ldzi_l __pcons10,R25
call _Exptab
; seed = 5767 ;
stizi_w 0x1687,_seed
; for ( i = 1; i <= 256 ; i++ )
movi_w 0x1,R0
Oscar_L1:
; {
; Uniform11( seed, zr );
ldzi_w _seed,R24
ldzi_f _zr,R25
cvtfd R25,R25
call _Uniform11
; Uniform11( seed, zi );
ldzi_w _seed,R24
ldzi_f _zi,R25
cvtfd R25,R25
call _Uniform11
; z[i].rp = 20.0*zr - 10.0;
ldzi_f _zr,R24
cvtfd R24,R24
ldzi_d __dcons7,R26
mul_d R24,R26,R24
ldzi_d __dcons8,R26
sub_d R24,R26,R24
cvtdf R24,R25
cvtswl R0,R24
llsi_l R24,0x3,R24
ldzi_l __pcons11,R26
add_l R24,R26,R26
strri_f R25,R26,0x0
; z[i].ip = 20.0*zi - 10.0;
ldzi_f _zi,R25
cvtfd R25,R25
ldzi_d __dcons7,R27
mul_d R25,R27,R27
ldzi_d __dcons8,R25
sub_d R27,R25,R25
cvtdf R25,R25
ldzi_l __pcons12,R26
add_l R24,R26,R24
strri_f R25,R24,0x0
addi_w R0,0x1,R0
cmpi_w R0,0x100
ble Oscar_L1
; }
; for ( i = 1; i <= 20; i++ ) {
movi_w 0x1,R0
Oscar_L2:
; Fft( 256 ,z,w,e,0.0625) ;
addi_l R31,0x8,R31
movi_w 0x100,R24
ldzi_l __pcons11,R25
ldzi_l __pcons13,R26
ldzi_l __pcons10,R27
ldzi_d __dcons9,R1
strri_d R1,R31,-0x8
call _Fft
subi_l R31,0x8,R31
addi_w R0,0x1,R0
cmpi_w R0,0x14
ble Oscar_L2
; }
; EXIT
; .temp4 at stkloc 0 (0x0) size 16
ldri_l R30,0x0,R0
ldri_l R30,0x4,R1
ldri_l R30,0x8,R2
ldri_l R30,0xc,R28
mov_l R30,R31
ldri_l R31,-0x4,R30
subi_l R31,0x4,R31
ret
; }
.segment fft_bss,memtype=1,locinc=1,wordsize=8
fft_V19:
.global _zi
_zi:
.bss 4
; zi size 4
.global _e
_e:
.bss 1040
; e size 1040
.global _zr
_zr:
.bss 4
; zr size 4
.global _w
_w:
.bss 2056
; w size 2056
.global _z
_z:
.bss 2056
; z size 2056
.extern _seed
.extern _printf
.segment fft_data_const,memtype=1,locinc=1,wordsize=8,rom
__pcons13:
.data .byte3(_w)
.data .byte2(_w)
.data .byte1(_w)
.data .byte0(_w)
__pcons12:
.data .byte3(_z+4)
.data .byte2(_z+4)
.data .byte1(_z+4)
.data .byte0(_z+4)
__pcons11:
.data .byte3(_z)
.data .byte2(_z)
.data .byte1(_z)
.data .byte0(_z)
__pcons10:
.data .byte3(_e)
.data .byte2(_e)
.data .byte1(_e)
.data .byte0(_e)
__dcons9:
.data 0x3f
.data 0xb0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
__dcons8:
.data 0x40
.data 0x24
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
__dcons7:
.data 0x40
.data 0x34
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
__fconsf0:
.data 0x0
.data 0x0
.data 0x0
.data 0x0
__fconsf2:
.data 0x40
.data 0x0
.data 0x0
.data 0x0
__fcons6:
.data 0x40
.data 0x80
.data 0x0
.data 0x0
__fcons5:
.data 0x40
.data 0x49
.data 0xf
.data 0xda
__fcons4:
.data 0xbf
.data 0x80
.data 0x0
.data 0x0
__dcons3:
.data 0x40
.data 0xc0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data 0x0
__pcons2:
.data .byte3(fft_V3)
.data .byte2(fft_V3)
.data .byte1(fft_V3)
.data .byte0(fft_V3)
__pcons1:
.data .byte3(fft_V2)
.data .byte2(fft_V2)
.data .byte1(fft_V2)
.data .byte0(fft_V2)
__fconsf1:
.data 0x3f
.data 0x80
.data 0x0
.data 0x0
.segment fft_istrings_const,memtype=1,locinc=1,wordsize=8,rom
fft_V2:
.data 0xa
.data 0x0
fft_V3:
.data 0x20
.data 0x20
.data 0x25
.data 0x31
.data 0x35
.data 0x2e
.data 0x33
.data 0x65
.data 0x25
.data 0x31
.data 0x35
.data 0x2e
.data 0x33
.data 0x65
.data 0x0
.segment _idesc,memtype=1,locinc=1,wordsize=8,rom
.data 0x0
.data 0x0
.data 0x0
.data 0x0
.data .byte3(fft_V19)
.data .byte2(fft_V19)
.data .byte1(fft_V19)
.data .byte0(fft_V19)
.data 0x0
.data 0x0
.data 0x14
.data 0x28
Back to Archelon's Home Page