Last Revised 01/07/19
Here is a very simple Compiler Information File (CIF), a C program, and the resulting C code generated by the compiler under control of the CIF file.
# # This is the CIF file for sample.c, which # implements simple assignment, addition, and # subtraction for integers on a hypothetical # three-address architecture. # # Copyright © 1994,1997 by Archelon Inc. # # R E G I S T E R S E T S # mau := 8; /* byte addressable memory */ # # 32 general purposes which can be used for any type # and for any operand # regset := R[32] width=32 optype=int,ptr,ptr2,float,double,longdbl,codeptr regtype=char,short,int,ptr,ptr2,codeptr,long,float, double,longdbl; stkptr := R[31]; # define the stack pointer register scratch := R[24-27]; # reserve some scratch (global temporary) registers argreg := R[24-25]; # allow arguments to be passed in some registers color := R[0-29]; # define which registers will be controlled by # the register allocator. # # O P E R A N D S # # this section describes and names the various operands which # you can use in assembly language instructions. # operand code_addr codeptr; # pointer to code memory operand data_addr ptr; # pointer to data memory operand const16 sconst -32768 32767; # 16 bit signed constant operand gp_reg reg R; # generaral purpose register # # an "amode" is an address mode. This is a memory reference. # the "ri_addr" amode forms an address by summing a register and a constant. # the "dir_addr" amode forms an address by direct address. # operand ri_addr amode R+const16 format "%B,%O"; operand dir_addr amode data_addr format "%O"; # # F O R M A T S # # this section describes and names the various instruction formats # supported by the hardware. You define a format as using zero or more # operands. # "src" means the only source operand # "lsrc" means the left source operand # "rsrc" means the right source operand # "dest" means the destination operand # # Although this example shows only one operand for each source or # destination, you can use a sequence of several operands # separated by or-bars. # format mem_load_ri src ri_addr dest gp_reg; format mem_load_dir src dir_addr dest gp_reg; format mem_store_rri src gp_reg dest ri_addr; format mem_store_dir src gp_reg dest dir_addr; format binary_rrr lsrc gp_reg rsrc gp_reg dest gp_reg; format binary_rir lsrc gp_reg rsrc const16 dest gp_reg; format unary_rr src gp_reg dest gp_reg; format unary_ir src const16 dest gp_reg; format noargs; format branch dest code_addr; # # O P C O D E S # # In this section, you define the op codes of your machine # As part of the definition, you also provide the format which it uses. # There are also some special tags which you must provide # for certain instructions. For instance, the last item on # this list is tagged " : call ". This means that the "call" # instruction is in fact a call to another function and # that the instruction will alter the flow of control. You must # provide this information so that the compiler can determine # what instructions alter flow of control when doing flow analysis. # opcode ldri mem_load_ri; opcode ld mem_load_dir; opcode strri mem_store_rri; opcode st mem_store_dir; opcode add binary_rrr; opcode addi binary_rir; opcode sub binary_rrr; opcode subi binary_rir; opcode mov : move unary_rr; opcode movi unary_ir; opcode ret : ret noargs; opcode jmp : jump branch; opcode call : call branch; # # C O D E T A B L E S # # a "code" section generates code for a the intermediate language operator # invoked from an "oper" directive. # code binary(opcode) # code table for add and subtract ? matches( $right, const16 ) # predicate test: true if rhs is 16 bit constant { # if the right hand side is a 16 bit constant, # then use either the add immediate or subtract immediate opcode opcode|"i" $left,$right,$dest; } { # the default action is to use the non-immediate opcode. # the left and right operands will be loaded into registers, # if they are not there already. opcode $left,$right,$dest; } # the ADD operators for signed short and for unsigned short # invoke the "binary" code table with an argument of "add". oper ADD : sshort binary( "add" ); oper ADD : ushort binary( "add" ); oper SUB : sshort binary( "sub" ); oper SUB : ushort binary( "sub" ); # code tables for simple assisignment # the compiler selects the first predicate which matches. # If none match, it will use the case without a predicate. code asgn ? in_reg_set($left,R) && matches($right,const16) { # left hand side is in a register # and right hand side is a 16 bit constant movi $right,$left; $ASGN $nodest,$dest,$left; } ? in_reg_set($left,R) && mem_ref($right) { # left hand side is in a register # and right hand side is a memory reference $LOAD $left,$right; $ASGN $nodest,$dest,$left; } ? in_reg_set($left,R) { # left hand side is in a register mov $right,$left; $ASGN $nodest,$dest,$left; } ? matches($left,dir_addr) { # left hand side is directly addressable st $right,$left $ASGN $nodest,$dest,$right } { # default - compiler will put rhs in reg, put # address of place to store in another reg. strri $right,$left $ASGN $nodest,$dest,$right } # operator nodes for signed short and unsigned short simple assignment. oper ASGN : sshort asgn; oper ASGN : ushort asgn; # code table to load something into a register. code load ? in_reg_set($left,R) { # lhs in register, use move register to register instruction mov $left,$dest; } ? matches($left,const16) { # lhs is constant, use move immediate to register instruction movi $left,$dest; } ? matches($left,dir_addr) { # lhs is direct address, use load direct instruction ld $left,$dest; } { # default, force use of load indirect using register+offset address mode. ldri $left,$dest; } oper LOAD : sshort load; oper LOAD : ushort load; oper LOAD : ptr load; code op_noargs(opcode) { opcode; } oper RET : void op_noargs("ret"); # return instruction. code branch_code(opcode) { # branch - lhs is the label to which to branch. opcode $left } oper CALLDIR : void branch_code("call"); # call using a direct address oper JUMP : void branch_code("jmp"); # jump to label
int c; int f( int a, int b ) { return a + b; } void main() { c = f( 2, 3 ); }
; ; This is the code generated by the compiler, ; using the code tables in the CIF file. ; ; int c; ; f( int a, int b ) .segment sample_code,locinc=1 .asm f_LL1=0 .endasm .global _f _f: ; ENTRY ; { ; a at argloc 0 (0x0) in reg R0p size 2 ; b at argloc 0 (0x0) in reg R1p size 2 ; return a + b; add R24,R25,R24 ; EXIT ret ; } ; void ; main() .segment sample_data,memtype=1,wordsize=8,locinc=1 .global _C_stack_ _C_stack_: .bss 500 .segment sample_code,locinc=1 .asm main_LL1=0 .endasm .global _main _main: ld _.pcons1,R31 ; ENTRY ; { ; c = f( 2, 3 ); movi 0x2,R24 movi 0x3,R25 call _f st R24,_c ; EXIT ret ; } .segment sample_data,memtype=1,wordsize=8,locinc=1 .global _c _c: .zero 2 ; c size 2 _.pcons1: .data .bits(8,15,_C_stack_ + 499) .data .bits(0,7, _C_stack_ + 499)