Last Revised 01/07/19
Here is a very simple Compiler Information File (CIF), a C program, and the resulting C code generated by the compiler under control of the CIF file.
#
# This is the CIF file for sample.c, which
# implements simple assignment, addition, and
# subtraction for integers on a hypothetical
# three-address architecture.
#
# Copyright © 1994,1997 by Archelon Inc.
#
# R E G I S T E R S E T S
#
mau := 8; /* byte addressable memory */
#
# 32 general purposes which can be used for any type
# and for any operand
#
regset := R[32] width=32
optype=int,ptr,ptr2,float,double,longdbl,codeptr
regtype=char,short,int,ptr,ptr2,codeptr,long,float,
double,longdbl;
stkptr := R[31]; # define the stack pointer register
scratch := R[24-27]; # reserve some scratch (global temporary) registers
argreg := R[24-25]; # allow arguments to be passed in some registers
color := R[0-29]; # define which registers will be controlled by
# the register allocator.
#
# O P E R A N D S
#
# this section describes and names the various operands which
# you can use in assembly language instructions.
#
operand code_addr codeptr; # pointer to code memory
operand data_addr ptr; # pointer to data memory
operand const16 sconst -32768 32767; # 16 bit signed constant
operand gp_reg reg R; # generaral purpose register
#
# an "amode" is an address mode. This is a memory reference.
# the "ri_addr" amode forms an address by summing a register and a constant.
# the "dir_addr" amode forms an address by direct address.
#
operand ri_addr amode R+const16 format "%B,%O";
operand dir_addr amode data_addr format "%O";
#
# F O R M A T S
#
# this section describes and names the various instruction formats
# supported by the hardware. You define a format as using zero or more
# operands.
# "src" means the only source operand
# "lsrc" means the left source operand
# "rsrc" means the right source operand
# "dest" means the destination operand
#
# Although this example shows only one operand for each source or
# destination, you can use a sequence of several operands
# separated by or-bars.
#
format mem_load_ri src ri_addr dest gp_reg;
format mem_load_dir src dir_addr dest gp_reg;
format mem_store_rri src gp_reg dest ri_addr;
format mem_store_dir src gp_reg dest dir_addr;
format binary_rrr lsrc gp_reg rsrc gp_reg dest gp_reg;
format binary_rir lsrc gp_reg rsrc const16 dest gp_reg;
format unary_rr src gp_reg dest gp_reg;
format unary_ir src const16 dest gp_reg;
format noargs;
format branch dest code_addr;
#
# O P C O D E S
#
# In this section, you define the op codes of your machine
# As part of the definition, you also provide the format which it uses.
# There are also some special tags which you must provide
# for certain instructions. For instance, the last item on
# this list is tagged " : call ". This means that the "call"
# instruction is in fact a call to another function and
# that the instruction will alter the flow of control. You must
# provide this information so that the compiler can determine
# what instructions alter flow of control when doing flow analysis.
#
opcode ldri mem_load_ri;
opcode ld mem_load_dir;
opcode strri mem_store_rri;
opcode st mem_store_dir;
opcode add binary_rrr;
opcode addi binary_rir;
opcode sub binary_rrr;
opcode subi binary_rir;
opcode mov : move unary_rr;
opcode movi unary_ir;
opcode ret : ret noargs;
opcode jmp : jump branch;
opcode call : call branch;
#
# C O D E T A B L E S
#
# a "code" section generates code for a the intermediate language operator
# invoked from an "oper" directive.
#
code binary(opcode) # code table for add and subtract
? matches( $right, const16 ) # predicate test: true if rhs is 16 bit constant
{
# if the right hand side is a 16 bit constant,
# then use either the add immediate or subtract immediate opcode
opcode|"i" $left,$right,$dest;
}
{
# the default action is to use the non-immediate opcode.
# the left and right operands will be loaded into registers,
# if they are not there already.
opcode $left,$right,$dest;
}
# the ADD operators for signed short and for unsigned short
# invoke the "binary" code table with an argument of "add".
oper ADD : sshort binary( "add" );
oper ADD : ushort binary( "add" );
oper SUB : sshort binary( "sub" );
oper SUB : ushort binary( "sub" );
# code tables for simple assisignment
# the compiler selects the first predicate which matches.
# If none match, it will use the case without a predicate.
code asgn
? in_reg_set($left,R) && matches($right,const16)
{
# left hand side is in a register
# and right hand side is a 16 bit constant
movi $right,$left;
$ASGN $nodest,$dest,$left;
}
? in_reg_set($left,R) && mem_ref($right)
{
# left hand side is in a register
# and right hand side is a memory reference
$LOAD $left,$right;
$ASGN $nodest,$dest,$left;
}
? in_reg_set($left,R)
{
# left hand side is in a register
mov $right,$left;
$ASGN $nodest,$dest,$left;
}
? matches($left,dir_addr)
{
# left hand side is directly addressable
st $right,$left
$ASGN $nodest,$dest,$right
}
{
# default - compiler will put rhs in reg, put
# address of place to store in another reg.
strri $right,$left
$ASGN $nodest,$dest,$right
}
# operator nodes for signed short and unsigned short simple assignment.
oper ASGN : sshort asgn;
oper ASGN : ushort asgn;
# code table to load something into a register.
code load
? in_reg_set($left,R) {
# lhs in register, use move register to register instruction
mov $left,$dest;
}
? matches($left,const16) {
# lhs is constant, use move immediate to register instruction
movi $left,$dest;
}
? matches($left,dir_addr) {
# lhs is direct address, use load direct instruction
ld $left,$dest; }
{
# default, force use of load indirect using register+offset address mode.
ldri $left,$dest;
}
oper LOAD : sshort load;
oper LOAD : ushort load;
oper LOAD : ptr load;
code op_noargs(opcode) {
opcode;
}
oper RET : void op_noargs("ret"); # return instruction.
code branch_code(opcode) {
# branch - lhs is the label to which to branch.
opcode $left
}
oper CALLDIR : void branch_code("call"); # call using a direct address
oper JUMP : void branch_code("jmp"); # jump to label
int c;
int
f( int a, int b )
{
return a + b;
}
void
main()
{
c = f( 2, 3 );
}
;
; This is the code generated by the compiler,
; using the code tables in the CIF file.
;
; int c;
; f( int a, int b )
.segment sample_code,locinc=1
.asm
f_LL1=0
.endasm
.global _f
_f:
; ENTRY
; {
; a at argloc 0 (0x0) in reg R0p size 2
; b at argloc 0 (0x0) in reg R1p size 2
; return a + b;
add R24,R25,R24
; EXIT
ret
; }
; void
; main()
.segment sample_data,memtype=1,wordsize=8,locinc=1
.global _C_stack_
_C_stack_:
.bss 500
.segment sample_code,locinc=1
.asm
main_LL1=0
.endasm
.global _main
_main:
ld _.pcons1,R31
; ENTRY
; {
; c = f( 2, 3 );
movi 0x2,R24
movi 0x3,R25
call _f
st R24,_c
; EXIT
ret
; }
.segment sample_data,memtype=1,wordsize=8,locinc=1
.global _c
_c:
.zero 2
; c size 2
_.pcons1:
.data .bits(8,15,_C_stack_ + 499)
.data .bits(0,7, _C_stack_ + 499)