Forth RV32I Assembler

Here’s an assembler for RV32I that I wrote in Forth. I find the definitions of the instructions and instruction types especially elegant, and I find it to be a great demonstration of how concise and powerful Forth can be.

All code in this post is licensed under the AGPLv3.

  1\ encode stack values into proper instruction locations. all
  2\ encoding sequences must begin with 'opcode'.
  3: opcode             hex     7F and                     ;
  4: funct3        swap hex      7 and decimal 12 lshift + ;
  5: funct7        swap hex     7F and decimal 25 lshift + ;
  6: i-immed       swap hex    FFF and decimal 20 lshift + ;
  7: i-immed-shamt swap hex     1F and decimal 20 lshift + ;
  8: u-immed       swap hex  FFFFF and decimal 12 lshift + ;
  9: s-immed       over hex    FE0 and decimal 20 lshift +
 10                swap hex     1F and decimal  7 lshift + ;
 11: j-immed       over hex 100000 and decimal 11 lshift +
 12                over hex  FF000 and                   +
 13                over hex    800 and decimal  9 lshift +
 14                swap hex    7FE and decimal 20 lshift + ;
 15: b-immed       over hex   1000 and decimal 19 lshift +
 16                over hex    800 and decimal  4 rshift +
 17                over hex    7E0 and decimal 20 lshift +
 18                swap hex     1E and decimal  7 lshift + ;
 19: rd            swap hex     1F and decimal  7 lshift + ;
 20: rs1           swap hex     1F and decimal 15 lshift + ;
 21: rs2           swap hex     1F and decimal 20 lshift + ;
 22
 23\ instruction types. all instruction values should be pushed on the
 24\ stack with the opcode last before calling.
 25: r-type       opcode funct3 funct7               rs2 rs1 rd , ;
 26: i-type       opcode funct3        i-immed           rs1 rd , ;
 27: i-type-shamt opcode funct3 funct7 i-immed-shamt     rs1 rd , ;
 28: s-type       opcode funct3        s-immed       rs2 rs1    , ;
 29: b-type       opcode funct3        b-immed       rs2 rs1    , ;
 30: u-type       opcode               u-immed               rd , ;
 31: j-type       opcode               j-immed               rd , ;
 32
 33\ instructions. these are just simple encodings, no assembler
 34\ niceties yet.
 35\             funct7  funct3  opcode  encoding
 36:   addi, hex              0      13  i-type       ;
 37:   andi, hex              7      13  i-type       ;
 38:    ori, hex              6      13  i-type       ;
 39:   xori, hex              4      13  i-type       ;
 40:   slli, hex     00       1      13  i-type-shamt ;
 41:   srli, hex     00       5      13  i-type-shamt ;
 42:   srai, hex     20       5      13  i-type-shamt ;
 43:   slti, hex              2      13  i-type       ;
 44:  sltiu, hex              3      13  i-type       ;
 45:    lui, hex                     37  u-type       ;
 46:  auipc, hex                     17  u-type       ;
 47:    add, hex     00       0      33  r-type       ;
 48:    sub, hex     20       0      33  r-type       ;
 49:    and, hex     00       7      33  r-type       ;
 50:     or, hex     00       6      33  r-type       ;
 51:    xor, hex     00       4      33  r-type       ;
 52:    sll, hex     00       1      33  r-type       ;
 53:    srl, hex     00       5      33  r-type       ;
 54:    sra, hex     20       5      33  r-type       ;
 55:    slt, hex     00       2      33  r-type       ;
 56:   sltu, hex     00       3      33  r-type       ;
 57:    jal, hex                     6F  j-type       ;
 58:   jalr, hex              0      67  i-type       ;
 59:    beq, hex              0      63  b-type       ;
 60:    bne, hex              1      63  b-type       ;
 61:    blt, hex              4      63  b-type       ;
 62:   bltu, hex              6      63  b-type       ;
 63:    bge, hex              5      63  b-type       ;
 64:   bgeu, hex              7      63  b-type       ;
 65:     lw, hex              2      03  i-type       ;
 66:     lh, hex              1      03  i-type       ;
 67:    lhu, hex              5      03  i-type       ;
 68:     lb, hex              0      03  i-type       ;
 69:     sw, hex              2      23  s-type       ;
 70:     sh, hex              1      23  s-type       ;
 71:     sb, hex              0      23  s-type       ;
 72:  fence, hex              0      0F  i-type       ;
 73:  ecall, hex              0      73  i-type       ;
 74: ebreak, hex              0      73  i-type       ;
 75
 76\ some instructions, now with nicer usage.
 77: sw, >r swap r> sw, ;
 78: sh, >r swap r> sh, ;
 79: sb, >r swap r> sb, ;
 80: ecall,  0 0 0  ecall, ;   \ usage: ecall,
 81: ebreak, 0 0 1 ebreak, ;   \ usage: ebreak,
 82: fence, >r 0 0 r> fence, ; \ usage: imm fence,
 83
 84\ registers
 85decimal
 86 0 constant x0     1 constant x1     2 constant x2     3 constant x3
 87 4 constant x4     5 constant x5     6 constant x6     7 constant x7
 88 8 constant x8     9 constant x9    10 constant x10   11 constant x11
 8912 constant x12   13 constant x13   14 constant x14   15 constant x15
 9016 constant x16   17 constant x17   18 constant x18   19 constant x19
 9120 constant x20   21 constant x21   22 constant x22   23 constant x23
 9224 constant x24   25 constant x25   26 constant x26   27 constant x27
 9328 constant x28   29 constant x29   30 constant x30   31 constant x31
 94
 95\ registers (calling convention)
 96x0  constant zero \ zero constant
 97x1  constant ra   \ return address
 98x2  constant sp   \ stack pointer
 99x3  constant gp   \ global pointer
100x4  constant tp   \ thread pointer
101x8  constant fp   \ frame pointer
102\ function arguments / return values (a0, a1)
103x10 constant a0  x11 constant a1  x12 constant a2   x13 constant a3
104x14 constant a4  x15 constant a5  x16 constant a6   x17 constant a7
105\ saved registers
106x8  constant s0  x9  constant s1  x18 constant s2   x19 constant s3
107x20 constant s4  x21 constant s5  x22 constant s6   x23 constant s7
108x24 constant s8  x25 constant s9  x26 constant s10  x27 constant s11
109\ temporaries
110x5  constant t0  x6  constant t1  x7  constant t2   x28 constant t3
111x29 constant t4  x30 constant t5  x31 constant t6

And here’s some tests to verify the instruction encodings are generated correctly:

 1: undo, -1 cells allot here @ ; \ undoes the last ','
 2t{ a0 a1 hex   FF   addi, undo, -> hex 0FF58513 }t
 3t{ a0 a1 hex   FF   andi, undo, -> hex 0FF5F513 }t
 4t{ a0 a1 hex   FF    ori, undo, -> hex 0FF5E513 }t
 5t{ a0 a1 hex   FF   xori, undo, -> hex 0FF5C513 }t
 6t{ a0 a1 hex    F   slli, undo, -> hex 00F59513 }t
 7t{ a0 a1 hex    F   srli, undo, -> hex 00F5D513 }t
 8t{ a0 a1 hex    F   srai, undo, -> hex 40F5D513 }t
 9t{ t0    hex FFFF    lui, undo, -> hex 0FFFF2B7 }t
10t{ t0    hex FFFF  auipc, undo, -> hex 0FFFF297 }t
11t{ a0 a1 hex   FF   slti, undo, -> hex 0FF5A513 }t
12t{ a0 a1 hex   FF  sltiu, undo, -> hex 0FF5B513 }t
13t{ a0 a1 a2          add, undo, -> hex 00C58533 }t
14t{ a0 a1 a2          sub, undo, -> hex 40C58533 }t
15t{ a0 a1 a2          and, undo, -> hex 00C5F533 }t
16t{ a0 a1 a2           or, undo, -> hex 00C5E533 }t
17t{ a0 a1 a2          xor, undo, -> hex 00C5C533 }t
18t{ a0 a1 a2          sll, undo, -> hex 00C59533 }t
19t{ a0 a1 a2          srl, undo, -> hex 00C5D533 }t
20t{ a0 a1 a2          sra, undo, -> hex 40C5D533 }t
21t{ a0 a1 a2          slt, undo, -> hex 00C5A533 }t
22t{ a0 a1 a2         sltu, undo, -> hex 00C5B533 }t
23t{ ra    hex FFFF    jal, undo, -> hex 7FF0F0EF }t
24t{ ra a0 hex   FF   jalr, undo, -> hex 0FF500E7 }t
25t{ a0 a1 hex    F    beq, undo, -> hex 00B50763 }t
26t{ a0 a1 hex    F    bne, undo, -> hex 00B51763 }t
27t{ a0 a1 hex    F    blt, undo, -> hex 00B54763 }t
28t{ a0 a1 hex    F   bltu, undo, -> hex 00B56763 }t
29t{ a0 a1 hex    F    bge, undo, -> hex 00B55763 }t
30t{ a0 a1 hex    F   bgeu, undo, -> hex 00B57763 }t
31t{ a0 a1 hex    F     lw, undo, -> hex 00F5A503 }t
32t{ a0 a1 hex    F     lh, undo, -> hex 00F59503 }t
33t{ a0 a1 hex    F    lhu, undo, -> hex 00F5D503 }t
34t{ a0 a1 hex    F     lb, undo, -> hex 00F58503 }t
35t{ a0 a1 hex    F     sw, undo, -> hex 00A5A7A3 }t
36t{ a0 a1 hex    F     sh, undo, -> hex 00A597A3 }t
37t{ a0 a1 hex    F     sb, undo, -> hex 00A587A3 }t
38t{       hex  0FF  fence, undo, -> hex 0FF0000F }t
39t{                 ecall, undo, -> hex 00000073 }t
40t{                ebreak, undo, -> hex 00100073 }t

#forth #assembly #risc-v

Reply to this post by email ↪