Skip to content

Commit 69defc9

Browse files
Refactor embedding into python tool, refresh documentation (#5)
1 parent 63dbe0e commit 69defc9

4 files changed

Lines changed: 113 additions & 146 deletions

File tree

Makefile

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,17 @@ CC = gcc
33
INC = -Iinclude
44
DIR = bin
55
BIN = $(DIR)/vbo
6+
PYTHON ?= python3
67

78
SRC_COMMON = src/instructions.c src/utils.c src/hardware.c
89
SRC_MAIN = src/main.c $(SRC_COMMON)
910

1011
FLAGS = -Wall -Wextra -Werror -g -std=c11 -pedantic
1112

12-
## Optional embedded image
13-
# e.g. make build EMBED_IMAGE=images/2048.obj
1413
EMBED_IMAGE ?=
1514
EMBED_OBJ :=
16-
EMBED_GLUE :=
1715
ifneq ($(strip $(EMBED_IMAGE)),)
18-
EMBED_OBJ := $(EMBED_IMAGE:.obj=.o)
19-
EMBED_GLUE := images/embed_glue.o
16+
EMBED_OBJ := images/vbo_image.o
2017
endif
2118

2219
TEST_DIR = test
@@ -25,36 +22,20 @@ TEST_DIR = test
2522

2623
all: build
2724

28-
build: $(EMBED_OBJ) $(EMBED_GLUE)
25+
build: $(EMBED_OBJ)
2926
mkdir -p $(DIR)
30-
$(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) $(EMBED_GLUE) -o $(BIN) $(FLAGS)
31-
32-
# Convert raw image to linkable object (ld -b binary)
33-
%.o: %.obj
34-
ld -r -b binary -o $@ $<
35-
36-
# Glue exposes vbo_image_start/end for the chosen image
37-
images/embed_glue.o: $(EMBED_OBJ)
38-
@stem=_binary_$$(echo $(EMBED_IMAGE) | sed 's/[^A-Za-z0-9_]/_/g'); \
39-
mkdir -p images; \
40-
printf '%s\n' \
41-
'/* generated */' \
42-
"extern const unsigned char $${stem}_start[];" \
43-
"extern const unsigned char $${stem}_end[];" \
44-
"const unsigned char* vbo_image_start = $${stem}_start;" \
45-
"const unsigned char* vbo_image_end = $${stem}_end;" \
46-
> images/embed_glue.c; \
47-
$(CC) -c images/embed_glue.c -o images/embed_glue.o $(FLAGS)
27+
$(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) -o $(BIN) $(FLAGS)
28+
29+
images/vbo_image.o: $(EMBED_IMAGE) tools/embed_image.py
30+
mkdir -p images
31+
$(PYTHON) tools/embed_image.py --input "$(EMBED_IMAGE)" --output $@
4832

4933
clean:
5034
rm -f $(BIN)
5135
rm -f $(TEST_DIR)/test_utils
52-
rm -f images/embed_glue.c images/embed_glue.o
36+
rm -f images/vbo_image.o
5337

5438
distclean: clean
55-
ifneq ($(strip $(EMBED_OBJ)),)
56-
rm -f $(EMBED_OBJ)
57-
endif
5839

5940
test:
6041
$(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/hardware.c -o $(TEST_DIR)/test_utils $(FLAGS)

docs/isa.md

Lines changed: 1 addition & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -13,93 +13,4 @@ refer to `include/hardware.h`
1313

1414
## Scope
1515

16-
This document captures the current draft ISA for the VM and the stable parts of the design that are already present in the codebase. It intentionally leaves unspecified items blank until the implementation lands. The design targets an 18-bit, randomised ISA in later iterations while the first implementation runs a non-randomised, 16-bit equivalent.
17-
18-
## Word size
19-
20-
* Early implementation: 16-bit words
21-
* Planned implementation: 18-bit words (with a 5-bit opcode in the high-order bits)
22-
23-
## Register file
24-
25-
* General-purpose: `R0``R7` (8 total)
26-
* Special: `PC` (program counter), `COND` (condition flags)
27-
* Condition flags (one-hot): `FL_POS`, `FL_ZRO`, `FL_NEG`
28-
29-
## Memory model
30-
31-
* Flat 64K-word address space in early 16-bit design
32-
* `PC_START` set to `0x3000` for initial entry
33-
* Unified code/data; device/port mapping is TBD
34-
35-
## Instruction formats (planned 18-bit layouts)
36-
37-
The 18-bit ISA defines three canonical encodings. The early 16-bit ISA follows the same taxonomy but may use narrower immediates and offsets.
38-
39-
| Format | Bits | Fields |
40-
| --- | --- | --- |
41-
| R-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] rt[6:4] flags[3:0]` |
42-
| I-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] imm7[6:0]` (signed) |
43-
| M-type | 18 | `opcode[17:13] rd[12:10] offset10[9:0]` (signed) |
44-
45-
The 16-bit variant keeps the same conceptual fields but packs them within 16 bits. Final bit positions for the 16-bit encoding are TBD in this document and will be aligned with the implementation.
46-
47-
## Opcodes
48-
49-
The following opcodes are defined in the headers and constitute the core instruction set. Semantics follow conventional interpretations unless specified otherwise. Execution details for edge cases will be specified alongside the implementation.
50-
51-
| Mnemonic | Category | Brief semantics |
52-
| --- | --- | --- |
53-
| `NOP` | Control | No operation |
54-
| `HALT` | Control | Stop execution |
55-
| `ADD` | R-type | `rd = rs + rt`; updates `COND` |
56-
| `SUB` | R-type | `rd = rs - rt`; updates `COND` |
57-
| `AND` | R-type | `rd = rs & rt`; updates `COND` |
58-
| `OR` | R-type | `rd = rs | rt`; updates `COND` |
59-
| `XOR` | R-type | `rd = rs ^ rt`; updates `COND` |
60-
| `NOT` | R-type | `rd = ~rs`; updates `COND` |
61-
| `MOV` | R-type | `rd = rs`; may update `COND` (TBD) |
62-
| `LSH` | R-type | Logical left shift (operands/amount TBD) |
63-
| `RSH` | R-type | Logical right shift (operands/amount TBD) |
64-
| `CMP` | R-type | Compare `rs` and `rt`, set `COND` |
65-
| `ADDI` | I-type | `rd = rs + sext(imm)`; updates `COND` |
66-
| `ANDI` | I-type | `rd = rs & imm`; updates `COND` |
67-
| `ORI` | I-type | `rd = rs | imm`; updates `COND` |
68-
| `LDI` | M-type | Load immediate/addressing (exact mode TBD) |
69-
| `LD` | M-type | `rd = MEM[PC + off]` or base+off (TBD) |
70-
| `ST` | M-type | `MEM[PC + off] = rd` or base+off (TBD) |
71-
| `LDIND` | M-type | `rd = MEM[ MEM[base] + off ]` (TBD) |
72-
| `STIND` | M-type | `MEM[ MEM[base] + off ] = rs` (TBD) |
73-
| `JMP` | M-type | Unconditional jump (target encoding TBD) |
74-
| `JZ` | M-type | Jump if zero flag set |
75-
| `JNZ` | M-type | Jump if zero flag clear |
76-
| `CALL` | M-type | Call subroutine (linkage/stack TBD) |
77-
| `RET` | R-type/Implicit | Return from subroutine (exact source of target TBD) |
78-
| `PUSH` | R-type | Push register to stack (stack reg TBD) |
79-
| `POP` | R-type | Pop into register (stack reg TBD) |
80-
| `IN` | I-type | Read from device/port (map TBD) |
81-
| `OUT` | I-type | Write to device/port (map TBD) |
82-
| `TRAP` | I-type | Supervisor/service call (vector table TBD) |
83-
| `RAND` | R-type | Random number generator (source TBD) |
84-
| `SLEEP` | I-type | Delay/sleep for a duration (units TBD) |
85-
86-
Notes:
87-
88-
* Branch conditions use `COND` flags. Precise signedness rules for comparisons are TBD.
89-
* Shifts are logical in the current design; arithmetic right shift may be added later if required.
90-
91-
## Condition codes
92-
93-
Operations that write a result typically update `COND`:
94-
95-
* Result > 0 → `FL_POS`
96-
* Result == 0 → `FL_ZRO`
97-
* Result < 0 (interpreting the 16-bit result as signed) → `FL_NEG`
98-
99-
Exact update policy for non-arithmetic instructions (e.g., `MOV`, shifts, loads) will be specified with their implementations.
100-
101-
## Encoding constraints and compatibility
102-
103-
* The opcode field is planned to be 5 bits in the 18-bit design
104-
* The early 16-bit encoding mirrors the same opcode taxonomy to ease transition
105-
* Randomised opcode mapping is out of scope for the first implementation
16+
This document outlines the design specification for the custom ISA. This document is a stub.

docs/vm.md

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ refer to `include/hardware.h`
1212

1313
## Scope
1414

15-
This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation and highlights planned shifts to an 18-bit, randomised ISA. It intentionally omits unfinished or unspecified details.
15+
This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation of the VM interpreter. It intentionally omits unfinished or unspecified details.
1616

1717
## Architecture overview
1818

@@ -22,6 +22,8 @@ This document describes the VM that executes the virtualised program. It covers
2222
* Fetch–decode–execute loop drives execution
2323
* Image files are loaded into memory before execution begins
2424

25+
This is to align with the design of LC-3
26+
2527
## Registers
2628

2729
| Name | Width | Purpose |
@@ -49,39 +51,14 @@ This document describes the VM that executes the virtualised program. It covers
4951
| --- | --- |
5052
| Address space | 0x0000 to 0xFFFF |
5153
| Capacity | 65,536 words |
52-
| Word width | 16 bits (early implementation) |
54+
| Word width | 16 bits |
5355
| Layout | Unified code and data |
5456

5557
`PC` is initialised to `PC_START` (currently `0x3000`). This leaves lower memory available for images, vectors, and future system structures. Exact segmenting is to be defined.
5658

5759
## Program loading
5860

59-
* Command-line accepts one or more image files
60-
* Each image is validated and loaded into memory
61-
* On failure to load, execution aborts
62-
63-
Image format and relocation rules are to be defined. The loader interface exists; the on-disk format is not yet documented here.
64-
65-
## Execution model
66-
67-
* Initialise arguments and memory
68-
* Set `COND` to `FL_ZRO`
69-
* Set `PC` to `PC_START`
70-
* Main loop repeats while running
71-
* Fetch: read instruction word at `PC`, then increment `PC`
72-
* Decode: extract opcode from the high-order bits
73-
* Execute: dispatch to the operation handler
74-
* Update flags and registers as specified by the operation
75-
76-
At present, `HALT` terminates the loop. Additional operations are defined by the ISA but may be implemented incrementally.
77-
78-
## Instruction width and encoding
79-
80-
* Early implementation uses 16-bit instruction words
81-
* Planned implementation uses 18-bit instruction words with a 5-bit opcode field and structured operand fields
82-
* To maintain forward compatibility, the VM structure and instruction taxonomy align with the 18-bit design even when running 16-bit words
83-
84-
Exact 16-bit field layouts are intentionally not fixed in this document. See the ISA document for the intended 18-bit layouts.
61+
Images are embedded into the memory with an image embedding tool that creates a relocatable object (containing the image as a byte array), exposes symbols for the image start & end, and the C code accesses the image through these two pointers.
8562

8663
## I/O
8764

tools/embed_image.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Embed a binary payload as a relocatable object that exports:
4+
- vbo_image_start (const unsigned char*)
5+
- vbo_image_end (const unsigned char*)
6+
7+
Usage:
8+
embed_image.py --input path/to/image.obj --output images/vbo_image.o
9+
10+
The resulting object can be linked into the final binary. main.c declares the
11+
symbols as weak pointers, so linking this object makes the embedded image usable
12+
without glue files.
13+
"""
14+
15+
import argparse
16+
import os
17+
import shlex
18+
import subprocess
19+
import sys
20+
import tempfile
21+
22+
ASM_TEMPLATE = r"""
23+
.section .rodata.vbo_payload, "a", @progbits
24+
.globl vbo_payload_start
25+
.type vbo_payload_start, @object
26+
vbo_payload_start:
27+
.incbin "{input_path}"
28+
29+
.globl vbo_payload_end
30+
.type vbo_payload_end, @object
31+
vbo_payload_end:
32+
.byte 0
33+
34+
.size vbo_payload_start, vbo_payload_end - vbo_payload_start
35+
36+
.section .rodata.vbo_ptrs, "a", @progbits
37+
.globl vbo_image_start
38+
.type vbo_image_start, @object
39+
#if __SIZEOF_POINTER__ == 8
40+
vbo_image_start:
41+
.quad vbo_payload_start
42+
.size vbo_image_start, 8
43+
44+
.globl vbo_image_end
45+
.type vbo_image_end, @object
46+
vbo_image_end:
47+
.quad vbo_payload_end
48+
.size vbo_image_end, 8
49+
#else
50+
vbo_image_start:
51+
.long vbo_payload_start
52+
.size vbo_image_start, 4
53+
54+
.globl vbo_image_end
55+
.type vbo_image_end, @object
56+
vbo_image_end:
57+
.long vbo_payload_end
58+
.size vbo_image_end, 4
59+
#endif
60+
"""
61+
62+
def run(cmd, cwd=None):
63+
try:
64+
subprocess.check_call(cmd, cwd=cwd)
65+
except subprocess.CalledProcessError as e:
66+
print(f"Command failed: {' '.join(map(shlex.quote, cmd))}", file=sys.stderr)
67+
raise
68+
69+
70+
def main():
71+
p = argparse.ArgumentParser()
72+
p.add_argument('--input', required=True, help='Path to payload binary to embed')
73+
p.add_argument('--output', required=True, help='Path to output object file (.o)')
74+
args = p.parse_args()
75+
76+
inp = os.path.abspath(args.input)
77+
out = os.path.abspath(args.output)
78+
79+
if not os.path.isfile(inp):
80+
print(f"Input file not found: {inp}", file=sys.stderr)
81+
return 2
82+
83+
os.makedirs(os.path.dirname(out), exist_ok=True)
84+
85+
with tempfile.TemporaryDirectory() as td:
86+
asm_path = os.path.join(td, 'vbo_embed.S')
87+
with open(asm_path, 'w') as f:
88+
f.write(ASM_TEMPLATE.format(input_path=inp.replace('\\', '\\\\').replace('"', '\\"')))
89+
90+
# Assemble via gcc driver for portability
91+
cmd = ['gcc', '-x', 'assembler-with-cpp', '-c', asm_path, '-o', out, '-nostdlib']
92+
run(cmd)
93+
94+
return 0
95+
96+
97+
if __name__ == '__main__':
98+
sys.exit(main())

0 commit comments

Comments
 (0)