diff --git a/Cargo.lock b/Cargo.lock index e4ce1de..073b40c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,6 +58,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "bnf" version = "0.6.0" @@ -78,6 +84,16 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -130,12 +146,24 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "foldhash" version = "0.2.0" @@ -173,6 +201,30 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "inkwell" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1def4112dfb2ce2993db7027f7acdb43c1f4ee1c70a082a2eef306ed5d0df365" +dependencies = [ + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "thiserror", +] + +[[package]] +name = "inkwell_internals" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63736175c9a30ea123f7018de9f26163e0b39cd6978990ae486b510c4f3bad69" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -195,12 +247,32 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +[[package]] +name = "llvm-sys" +version = "211.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "108b3ad2b2eaf2a561fc74196273b20e3436e4a688b8b44e250d83974dc1b2e2" +dependencies = [ + "anyhow", + "cc", + "lazy_static", + "libc", + "regex-lite", + "semver", +] + [[package]] name = "memchr" version = "2.8.0" @@ -290,12 +362,24 @@ dependencies = [ "getrandom", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + +[[package]] +name = "rpgrt" +version = "0.1.0" + [[package]] name = "rust-langrpg" version = "0.1.0" dependencies = [ "bnf", "clap", + "either", + "inkwell", ] [[package]] @@ -304,6 +388,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -347,6 +437,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "strsim" version = "0.11.1" @@ -364,6 +460,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/Cargo.toml b/Cargo.toml index 88e3bcb..f59912f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,24 @@ +[workspace] +members = [ + ".", + "rpgrt", +] +resolver = "2" + +# ───────────────────────────────────────────────────────────────────────────── +# Main compiler package +# ───────────────────────────────────────────────────────────────────────────── + [package] -name = "rust-langrpg" +name = "rust-langrpg" version = "0.1.0" edition = "2024" default-run = "rust-langrpg" +# ───────────────────────────────────────────────────────────────────────────── +# Binaries +# ───────────────────────────────────────────────────────────────────────────── + [[bin]] name = "rust-langrpg" path = "src/main.rs" @@ -12,6 +27,21 @@ path = "src/main.rs" name = "demo" path = "src/bin/demo.rs" +# ───────────────────────────────────────────────────────────────────────────── +# Library (rlib — used by the binaries and tests) +# ───────────────────────────────────────────────────────────────────────────── + +[lib] +name = "rust_langrpg" +path = "src/lib.rs" +crate-type = ["rlib"] + +# ───────────────────────────────────────────────────────────────────────────── +# Dependencies +# ───────────────────────────────────────────────────────────────────────────── + [dependencies] -bnf = "0.6" -clap = { version = "4", features = ["derive"] } +bnf = "0.6" +clap = { version = "4", features = ["derive"] } +either = "1" +inkwell = { version = "0.8", features = ["llvm21-1"] } diff --git a/rpgrt/Cargo.toml b/rpgrt/Cargo.toml new file mode 100644 index 0000000..eef150b --- /dev/null +++ b/rpgrt/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "rpgrt" +version = "0.1.0" +edition = "2021" + +# ───────────────────────────────────────────────────────────────────────────── +# Build as a C-compatible shared library (librpgrt.so) so that RPG IV programs +# compiled by rust-langrpg can link against it at runtime. +# ───────────────────────────────────────────────────────────────────────────── + +[lib] +name = "rpgrt" +path = "src/lib.rs" +crate-type = ["cdylib", "rlib"] + +# cdylib → produces librpgrt.so (loaded by compiled RPG binaries) +# rlib → allows `cargo test` to run the unit tests in src/lib.rs + +[dependencies] +# No external dependencies — the runtime is intentionally minimal and relies +# only on the Rust standard library and libc (linked automatically). diff --git a/rpgrt/src/lib.rs b/rpgrt/src/lib.rs new file mode 100644 index 0000000..bc1c28e --- /dev/null +++ b/rpgrt/src/lib.rs @@ -0,0 +1,496 @@ +//! rpgrt.rs — RPG IV runtime library. +//! +//! This crate is compiled as a C-compatible shared library (`librpgrt.so`) that +//! RPG IV programs compiled by `rust-langrpg` link against at runtime. +//! +//! ## Exported symbols +//! +//! | Symbol | Signature | Description | +//! |---------------------|----------------------------------------|--------------------------------------| +//! | `rpg_dsply` | `(ptr: *const u8, len: i64)` | Display a fixed-length char field | +//! | `rpg_dsply_cstr` | `(ptr: *const c_char)` | Display a null-terminated C string | +//! | `rpg_dsply_i64` | `(n: i64)` | Display a signed 64-bit integer | +//! | `rpg_dsply_f64` | `(f: f64)` | Display a double-precision float | +//! | `rpg_halt` | `(code: i32)` | Abnormal program termination | +//! +//! ## Building +//! +//! The runtime is built automatically by `build.rs` as part of the normal +//! `cargo build` invocation. The resulting `librpgrt.so` is placed in the +//! Cargo output directory and the compiler binary links executables against it. +//! +//! To build it standalone: +//! +//! ```sh +//! rustc --edition 2024 --crate-type cdylib -o librpgrt.so src/bin/rpgrt.rs +//! ``` +//! +//! ## DSPLY semantics +//! +//! In a real IBM i system, `DSPLY` writes a message to the *program message +//! queue* (an interactive operator message queue). On a Linux host there is no +//! equivalent facility, so we write to **stdout**, mirroring the message format +//! IBM i uses: +//! +//! ```text +//! DSPLY Hello, World! +//! ``` +//! +//! The output is flushed immediately after every `DSPLY` call, matching the +//! interactive behaviour of the IBM i runtime. +//! +//! Trailing ASCII spaces (0x20) are stripped from fixed-length `CHAR` fields +//! before display, exactly as IBM i does. + +#![allow(clippy::missing_safety_doc)] + +use std::ffi::CStr; +use std::io::{self, Write}; +use std::slice; + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_dsply — display a fixed-length character field +// ───────────────────────────────────────────────────────────────────────────── + +/// Display the first `len` bytes pointed to by `ptr`, trimming trailing spaces, +/// then print a newline and flush stdout. +/// +/// This is the primary entry point called by the LLVM-compiled RPG procedure +/// for `DSPLY variable_name;`. +/// +/// # Safety +/// +/// * `ptr` must be valid for at least `len` bytes. +/// * `len` must be ≥ 0. A negative `len` is silently treated as 0. +#[no_mangle] +pub unsafe extern "C" fn rpg_dsply(ptr: *const u8, len: i64) { + let bytes = if ptr.is_null() || len <= 0 { + b"" as &[u8] + } else { + unsafe { slice::from_raw_parts(ptr, len as usize) } + }; + + // Strip trailing spaces (IBM i CHAR fields are space-padded to their + // declared length). + let trimmed = rtrim_spaces(bytes); + + // Convert to a lossy UTF-8 string so non-ASCII EBCDIC-origin data at + // least renders something printable rather than crashing. + let text = String::from_utf8_lossy(trimmed); + + let stdout = io::stdout(); + let mut out = stdout.lock(); + // Mimic IBM i DSPLY prefix. + let _ = writeln!(out, "DSPLY {}", text); + let _ = out.flush(); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_dsply_cstr — display a null-terminated C string +// ───────────────────────────────────────────────────────────────────────────── + +/// Display a null-terminated C string with a `DSPLY` prefix. +/// +/// # Safety +/// +/// `ptr` must point to a valid null-terminated C string. +#[no_mangle] +pub unsafe extern "C" fn rpg_dsply_cstr(ptr: *const std::os::raw::c_char) { + let text = if ptr.is_null() { + std::borrow::Cow::Borrowed("") + } else { + unsafe { CStr::from_ptr(ptr).to_string_lossy() } + }; + + let stdout = io::stdout(); + let mut out = stdout.lock(); + let _ = writeln!(out, "DSPLY {}", text); + let _ = out.flush(); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_dsply_i64 — display a signed 64-bit integer +// ───────────────────────────────────────────────────────────────────────────── + +/// Display the decimal representation of a signed 64-bit integer. +/// +/// Used when the argument to `DSPLY` is an integer expression rather than a +/// character variable. +#[no_mangle] +pub extern "C" fn rpg_dsply_i64(n: i64) { + let stdout = io::stdout(); + let mut out = stdout.lock(); + let _ = writeln!(out, "DSPLY {}", n); + let _ = out.flush(); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_dsply_f64 — display a double-precision float +// ───────────────────────────────────────────────────────────────────────────── + +/// Display the decimal representation of a 64-bit IEEE 754 float. +/// +/// Matches the numeric formatting IBM i uses for packed-decimal fields when +/// displayed via `DSPLY`. +#[no_mangle] +pub extern "C" fn rpg_dsply_f64(f: f64) { + let stdout = io::stdout(); + let mut out = stdout.lock(); + // Format with enough precision to round-trip. + let _ = writeln!(out, "DSPLY {}", f); + let _ = out.flush(); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_halt — abnormal termination +// ───────────────────────────────────────────────────────────────────────────── + +/// Terminate the program with the given exit code after printing an error +/// banner to stderr. +/// +/// Maps roughly to the IBM i concept of an *unhandled exception* ending the +/// job. +#[no_mangle] +pub extern "C" fn rpg_halt(code: i32) { + eprintln!("RPG program halted with code {}", code); + std::process::exit(code); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_memset_char — fill a CHAR field with a repeated byte +// ───────────────────────────────────────────────────────────────────────────── + +/// Fill the first `len` bytes at `ptr` with `fill_byte`. +/// +/// Used by `CLEAR` and `RESET` for character fields (fill with space 0x20). +/// +/// # Safety +/// +/// `ptr` must be valid for at least `len` bytes and must be writable. +#[no_mangle] +pub unsafe extern "C" fn rpg_memset_char(ptr: *mut u8, fill_byte: u8, len: i64) { + if ptr.is_null() || len <= 0 { + return; + } + let slice = unsafe { slice::from_raw_parts_mut(ptr, len as usize) }; + slice.fill(fill_byte); +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_move_char — move (copy) a CHAR field, padding / truncating as needed +// ───────────────────────────────────────────────────────────────────────────── + +/// Copy `src_len` bytes from `src` into a `dst_len`-byte field at `dst`. +/// +/// * If `src_len` < `dst_len` the destination is right-padded with spaces. +/// * If `src_len` > `dst_len` only the first `dst_len` bytes of `src` are +/// copied (left-truncation rule, matching RPG IV `MOVE` semantics). +/// +/// # Safety +/// +/// Both `src` and `dst` must be valid for their respective lengths. +#[no_mangle] +pub unsafe extern "C" fn rpg_move_char( + dst: *mut u8, + dst_len: i64, + src: *const u8, + src_len: i64, +) { + if dst.is_null() || src.is_null() || dst_len <= 0 { + return; + } + + let dst_slice = unsafe { slice::from_raw_parts_mut(dst, dst_len as usize) }; + let copy_len = (src_len.min(dst_len)) as usize; + + if src_len > 0 { + let src_slice = unsafe { slice::from_raw_parts(src, src_len as usize) }; + dst_slice[..copy_len].copy_from_slice(&src_slice[..copy_len]); + } + + // Pad remainder with spaces. + if (copy_len as i64) < dst_len { + dst_slice[copy_len..].fill(b' '); + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_trim — return pointer + new length for a space-trimmed CHAR field +// ───────────────────────────────────────────────────────────────────────────── + +/// Write the trimmed start pointer and trimmed length of a CHAR field into +/// `out_ptr` and `out_len` respectively. +/// +/// Leading *and* trailing spaces are stripped (equivalent to `%TRIM`). +/// +/// # Safety +/// +/// * `ptr` must be valid for `len` bytes. +/// * `out_ptr` and `out_len` must be valid writable pointers. +#[no_mangle] +pub unsafe extern "C" fn rpg_trim( + ptr: *const u8, + len: i64, + out_ptr: *mut *const u8, + out_len: *mut i64, +) { + if ptr.is_null() || len <= 0 || out_ptr.is_null() || out_len.is_null() { + if !out_ptr.is_null() { unsafe { *out_ptr = ptr; } } + if !out_len.is_null() { unsafe { *out_len = 0; } } + return; + } + + let bytes = unsafe { slice::from_raw_parts(ptr, len as usize) }; + let trimmed = bytes + .iter() + .position(|&b| b != b' ') + .map(|start| { + let end = bytes.iter().rposition(|&b| b != b' ').unwrap_or(start) + 1; + &bytes[start..end] + }) + .unwrap_or(&bytes[0..0]); + + unsafe { + *out_ptr = trimmed.as_ptr(); + *out_len = trimmed.len() as i64; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_len — return the non-space length of a CHAR field (%LEN semantics) +// ───────────────────────────────────────────────────────────────────────────── + +/// Return the *declared* length of a CHAR field (i.e. `len` itself), not the +/// trimmed length. This matches RPG IV `%LEN` which returns the declared size. +/// +/// # Safety +/// +/// No pointer dereference is performed; this function is trivially safe. +#[no_mangle] +pub extern "C" fn rpg_len(_ptr: *const u8, len: i64) -> i64 { + len +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_scan — %SCAN(search : source [: start]) +// ───────────────────────────────────────────────────────────────────────────── + +/// Search for `search_ptr[0..search_len]` inside `src_ptr[0..src_len]` +/// starting at byte offset `start` (1-based, RPG IV convention). +/// +/// Returns the 1-based position of the first match, or 0 if not found. +/// +/// # Safety +/// +/// Both pointers must be valid for their respective lengths. +#[no_mangle] +pub unsafe extern "C" fn rpg_scan( + search_ptr: *const u8, + search_len: i64, + src_ptr: *const u8, + src_len: i64, + start: i64, // 1-based; 0 means "from beginning" (treated as 1) +) -> i64 { + if search_ptr.is_null() || src_ptr.is_null() + || search_len <= 0 || src_len <= 0 + { + return 0; + } + + let needle = unsafe { slice::from_raw_parts(search_ptr, search_len as usize) }; + let hay = unsafe { slice::from_raw_parts(src_ptr, src_len as usize) }; + + let from = if start <= 0 { 0 } else { (start - 1) as usize }; + if from >= hay.len() { return 0; } + + hay[from..] + .windows(needle.len()) + .position(|w| w == needle) + .map(|p| (from + p + 1) as i64) // convert back to 1-based + .unwrap_or(0) +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_subst — %SUBST(str : start [: len]) +// ───────────────────────────────────────────────────────────────────────────── + +/// Write up to `sub_len` bytes from `src_ptr` starting at byte `start` +/// (1-based) into `dst_ptr`. Returns the number of bytes written. +/// +/// If `sub_len` is 0 the function copies from `start` to the end of the +/// source field (mirrors RPG IV `%SUBST` two-argument form). +/// +/// # Safety +/// +/// All pointers must be valid for their respective lengths. +#[no_mangle] +pub unsafe extern "C" fn rpg_subst( + src_ptr: *const u8, + src_len: i64, + start: i64, // 1-based + sub_len: i64, // 0 = "to end" + dst_ptr: *mut u8, + dst_len: i64, +) -> i64 { + if src_ptr.is_null() || dst_ptr.is_null() || src_len <= 0 || dst_len <= 0 { + return 0; + } + + let src = unsafe { slice::from_raw_parts(src_ptr, src_len as usize) }; + let dst = unsafe { slice::from_raw_parts_mut(dst_ptr, dst_len as usize) }; + + let from = if start <= 1 { 0 } else { (start - 1) as usize }; + if from >= src.len() { return 0; } + + let available = src.len() - from; + let want = if sub_len <= 0 { + available + } else { + (sub_len as usize).min(available) + }; + let copy = want.min(dst.len()); + + dst[..copy].copy_from_slice(&src[from..from + copy]); + copy as i64 +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helper: trim trailing ASCII spaces +// ───────────────────────────────────────────────────────────────────────────── + +#[inline] +fn rtrim_spaces(bytes: &[u8]) -> &[u8] { + let end = bytes + .iter() + .rposition(|&b| b != b' ') + .map(|i| i + 1) + .unwrap_or(0); + &bytes[..end] +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tests +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rtrim_strips_spaces() { + assert_eq!(rtrim_spaces(b"hello "), b"hello"); + assert_eq!(rtrim_spaces(b"hello"), b"hello"); + assert_eq!(rtrim_spaces(b" "), b""); + assert_eq!(rtrim_spaces(b""), b""); + } + + #[test] + fn rtrim_preserves_internal_spaces() { + assert_eq!(rtrim_spaces(b"hello world "), b"hello world"); + } + + #[test] + fn rpg_scan_finds_match() { + let hay = b"Hello, World!"; + let needle = b"World"; + let pos = unsafe { + rpg_scan( + needle.as_ptr(), needle.len() as i64, + hay.as_ptr(), hay.len() as i64, + 1, + ) + }; + assert_eq!(pos, 8); // 1-based position of 'W' + } + + #[test] + fn rpg_scan_not_found() { + let hay = b"Hello"; + let needle = b"XYZ"; + let pos = unsafe { + rpg_scan( + needle.as_ptr(), needle.len() as i64, + hay.as_ptr(), hay.len() as i64, + 1, + ) + }; + assert_eq!(pos, 0); + } + + #[test] + fn rpg_subst_copies_correctly() { + let src = b"Hello, World!"; + let mut dst = vec![0u8; 5]; + let written = unsafe { + rpg_subst( + src.as_ptr(), src.len() as i64, + 8, // start at 'W' (1-based) + 5, + dst.as_mut_ptr(), dst.len() as i64, + ) + }; + assert_eq!(written, 5); + assert_eq!(&dst, b"World"); + } + + #[test] + fn rpg_move_char_pads_with_spaces() { + let src = b"Hi"; + let mut dst = vec![0u8; 5]; + unsafe { + rpg_move_char( + dst.as_mut_ptr(), dst.len() as i64, + src.as_ptr(), src.len() as i64, + ); + } + assert_eq!(&dst, b"Hi "); + } + + #[test] + fn rpg_move_char_truncates() { + let src = b"Hello, World!"; + let mut dst = vec![0u8; 5]; + unsafe { + rpg_move_char( + dst.as_mut_ptr(), dst.len() as i64, + src.as_ptr(), src.len() as i64, + ); + } + assert_eq!(&dst, b"Hello"); + } + + #[test] + fn rpg_trim_removes_leading_and_trailing() { + let input = b" hello "; + let mut out_ptr: *const u8 = std::ptr::null(); + let mut out_len: i64 = 0; + unsafe { + rpg_trim( + input.as_ptr(), input.len() as i64, + &mut out_ptr, &mut out_len, + ); + let result = std::slice::from_raw_parts(out_ptr, out_len as usize); + assert_eq!(result, b"hello"); + } + } + + #[test] + fn rpg_dsply_smoke() { + // Just ensure it doesn't panic. + let msg = b"Hello, World! "; + unsafe { rpg_dsply(msg.as_ptr(), msg.len() as i64) }; + } + + #[test] + fn rpg_dsply_i64_smoke() { + rpg_dsply_i64(42); + rpg_dsply_i64(-1); + rpg_dsply_i64(i64::MAX); + } + + #[test] + fn rpg_dsply_f64_smoke() { + rpg_dsply_f64(3.14159); + rpg_dsply_f64(-0.0); + } +} diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..5d64103 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,651 @@ +//! ast.rs — Typed Abstract Syntax Tree for RPG IV free-format programs. +//! +//! This module defines the in-memory representation produced by the lowering +//! pass (`lower.rs`) and consumed by the LLVM code-generator (`codegen.rs`). +//! +//! Only the subset of the language that is needed to compile `hello.rpg` (and +//! small programs like it) is fully fleshed out. Everything else is kept as +//! placeholder variants so the lowering pass can represent the whole parse tree +//! without panicking, and the codegen can skip unimplemented nodes gracefully. + +// ───────────────────────────────────────────────────────────────────────────── +// Top-level program +// ───────────────────────────────────────────────────────────────────────────── + +/// A complete RPG IV source file. +#[derive(Debug, Clone)] +pub struct Program { + /// Zero or more top-level declarations (CTL-OPT, DCL-S, DCL-C, DCL-DS, + /// file declarations, subroutines …). + pub declarations: Vec, + /// Zero or more procedure definitions (`DCL-PROC … END-PROC`). + pub procedures: Vec, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Declarations +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum Declaration { + /// `CTL-OPT keyword-list;` + ControlSpec(ControlSpec), + /// `DCL-S name type [keywords];` + Standalone(StandaloneDecl), + /// `DCL-C name literal;` or `DCL-C name CONST(literal);` + Constant(ConstantDecl), + /// `DCL-C name *named-constant;` + NamedConstantDecl(NamedConstantDecl), + /// `DCL-DS name … END-DS;` + DataStructure(DataStructureDecl), + /// `DCL-F name …;` + File(FileDecl), + /// `BEG-SR name; … END-SR;` + Subroutine(Subroutine), +} + +// ── Control spec ────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct ControlSpec { + pub keywords: Vec, +} + +#[derive(Debug, Clone)] +pub enum CtlKeyword { + DftActGrp(bool), // *YES / *NO + NoMain, + Main(String), + Other(String), // catch-all for keywords we don't generate code for +} + +// ── Standalone variable ─────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct StandaloneDecl { + pub name: String, + pub ty: TypeSpec, + pub keywords: Vec, +} + +// ── Constant declaration ────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct ConstantDecl { + pub name: String, + pub value: Literal, +} + +#[derive(Debug, Clone)] +pub struct NamedConstantDecl { + pub name: String, + pub value: NamedConstant, +} + +// ── Data structure ──────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct DataStructureDecl { + pub name: String, + pub keywords: Vec, + pub fields: Vec, +} + +#[derive(Debug, Clone)] +pub enum DsKeyword { + Qualified, + Template, + Other(String), +} + +#[derive(Debug, Clone)] +pub struct DsField { + pub name: String, + pub ty: TypeSpec, + pub keywords: Vec, +} + +// ── File declaration ────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct FileDecl { + pub name: String, + pub keywords: Vec, // simplified — not code-gen'd +} + +// ── Subroutine ──────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct Subroutine { + pub name: String, + pub body: Vec, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Type specifications +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq)] +pub enum TypeSpec { + /// `CHAR(n)` — fixed-length character field. + Char(Box), + /// `VARCHAR(n)` — variable-length character. + VarChar(Box), + /// `INT(n)` — signed integer (n = 3, 5, 10, or 20). + Int(Box), + /// `UNS(n)` — unsigned integer. + Uns(Box), + /// `FLOAT(n)` — floating-point. + Float(Box), + /// `PACKED(digits:decimals)` + Packed(Box, Box), + /// `ZONED(digits:decimals)` + Zoned(Box, Box), + /// `BINDEC(digits:decimals)` + Bindec(Box, Box), + /// `IND` — indicator (boolean). + Ind, + /// `DATE [(*fmt)]` + Date, + /// `TIME [(*fmt)]` + Time, + /// `TIMESTAMP` + Timestamp, + /// `POINTER` + Pointer, + /// `LIKE(name)` + Like(String), + /// `LIKEDS(name)` + LikeDs(String), + /// Unrecognised / not yet implemented type. + Unknown(String), +} + +impl TypeSpec { + /// Return the number of bytes this type occupies at runtime on a 64-bit + /// Linux host. Returns `None` for types whose size is not statically known. + pub fn byte_size(&self) -> Option { + match self { + TypeSpec::Char(expr) | TypeSpec::VarChar(expr) => { + if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() { + Some(*n as u64) + } else { + None + } + } + TypeSpec::Int(expr) | TypeSpec::Uns(expr) => { + if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() { + Some(match n { + 3 => 1, + 5 => 2, + 10 => 4, + 20 => 8, + _ => 8, // default to 8 bytes + }) + } else { + None + } + } + TypeSpec::Float(expr) => { + if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() { + Some(if *n <= 4 { 4 } else { 8 }) + } else { + None + } + } + TypeSpec::Ind => Some(1), + TypeSpec::Pointer => Some(8), + TypeSpec::Packed(digits, _) => { + if let Expression::Literal(Literal::Integer(n)) = digits.as_ref() { + Some((*n as u64 / 2) + 1) + } else { + None + } + } + _ => None, + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Variable / declaration keywords +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum VarKeyword { + /// `INZ` — default initialisation. + Inz, + /// `INZ(expr)` — explicit initialisation value. + InzExpr(Expression), + /// `INZ(*named-constant)` — initialise to named constant. + InzNamed(NamedConstant), + Static, + Other(String), +} + +// ───────────────────────────────────────────────────────────────────────────── +// Procedures +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct Procedure { + pub name: String, + pub exported: bool, + pub pi: Option, + /// Local declarations (DCL-S, DCL-C, etc.) inside the procedure. + pub locals: Vec, + pub body: Vec, +} + +/// Procedure Interface specification (`DCL-PI … END-PI`). +#[derive(Debug, Clone)] +pub struct PiSpec { + pub name: String, + pub return_ty: Option, + pub params: Vec, +} + +#[derive(Debug, Clone)] +pub struct PiParam { + pub name: String, + pub ty: TypeSpec, + pub keywords: Vec, +} + +#[derive(Debug, Clone)] +pub enum ParamKeyword { + Value, + Const, + Other(String), +} + +// ───────────────────────────────────────────────────────────────────────────── +// Statements +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum Statement { + /// `lvalue = expr;` or `EVAL lvalue = expr;` + Assign(AssignStmt), + /// `IF expr; … [ELSEIF …] [ELSE …] ENDIF;` + If(IfStmt), + /// `DOW expr; … ENDDO;` + DoWhile(DoWhileStmt), + /// `DOU expr; … ENDDO;` + DoUntil(DoUntilStmt), + /// `FOR i = start TO/DOWNTO end [BY step]; … ENDFOR;` + For(ForStmt), + /// `SELECT; WHEN … [OTHER …] ENDSL;` + Select(SelectStmt), + /// `MONITOR; … ON-ERROR … ENDMON;` + Monitor(MonitorStmt), + /// `CALLP name(args);` or bare procedure call `name(args);` + CallP(CallPStmt), + /// `RETURN [expr];` + Return(ReturnStmt), + /// `LEAVE;` + Leave, + /// `ITER;` + Iter, + /// `LEAVESR;` + LeaveSr, + /// `EXSR name;` + ExSr(String), + /// `DSPLY expr;` + Dsply(DsplyStmt), + /// `RESET lvalue;` / `RESET *ALL;` + Reset(ResetStmt), + /// `CLEAR lvalue;` + Clear(LValue), + /// Any I/O statement (READ, WRITE, CHAIN, etc.) — kept as opaque for now. + Io(IoStatement), + /// Catch-all for statements not yet lowered. + Unimplemented(String), +} + +// ── Assignment ──────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct AssignStmt { + pub target: LValue, + pub value: Expression, +} + +// ── If / ElseIf / Else ──────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct IfStmt { + pub condition: Expression, + pub then_body: Vec, + pub elseifs: Vec, + pub else_body: Option>, +} + +#[derive(Debug, Clone)] +pub struct ElseIf { + pub condition: Expression, + pub body: Vec, +} + +// ── DOW loop ────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct DoWhileStmt { + pub condition: Expression, + pub body: Vec, +} + +// ── DOU loop ────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct DoUntilStmt { + pub condition: Expression, + pub body: Vec, +} + +// ── FOR loop ────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct ForStmt { + pub var: String, + pub start: Expression, + pub limit: Expression, + pub step: Option, + pub downto: bool, + pub body: Vec, +} + +// ── SELECT / WHEN ───────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct SelectStmt { + pub whens: Vec, + pub other: Option>, +} + +#[derive(Debug, Clone)] +pub struct WhenClause { + pub condition: Expression, + pub body: Vec, +} + +// ── MONITOR ─────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct MonitorStmt { + pub body: Vec, + pub handlers: Vec, +} + +#[derive(Debug, Clone)] +pub struct OnError { + pub codes: Vec, + pub body: Vec, +} + +#[derive(Debug, Clone)] +pub enum ErrorCode { + Integer(u32), + Program, + File, + All, +} + +// ── CALLP ───────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct CallPStmt { + pub name: String, + pub args: Vec, +} + +// ── RETURN ──────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct ReturnStmt { + pub value: Option, +} + +// ── DSPLY ───────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct DsplyStmt { + /// The expression to display. + pub expr: Expression, + /// Optional message queue identifier (two-operand form). + pub msg_q: Option, + pub response: Option, +} + +// ── RESET ───────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum ResetStmt { + Target(LValue), + All, +} + +// ── I/O (opaque) ────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum IoStatement { + Read { file: String }, + ReadP { file: String }, + Write { record: String }, + Update { record: String }, + Delete { key: Expression, file: String }, + Chain { key: Expression, file: String }, + SetLL { key: SetKey, file: String }, + SetGT { key: SetKey, file: String }, + Open { file: String }, + Close { file: Option }, // None = *ALL + Except { format: Option }, + ExFmt { format: String }, + Post { file: String }, + Feod { file: String }, + Unlock { file: String }, +} + +#[derive(Debug, Clone)] +pub enum SetKey { + Expr(Expression), + Start, + End, +} + +// ───────────────────────────────────────────────────────────────────────────── +// L-values +// ───────────────────────────────────────────────────────────────────────────── + +/// An assignable location. +#[derive(Debug, Clone, PartialEq)] +pub enum LValue { + /// Simple or dotted name: `myVar` or `ds.field`. + Name(QualifiedName), + /// Array element: `arr(i)`. + Index(QualifiedName, Vec), +} + +impl LValue { + /// Return the base name (first component of the qualified name). + pub fn base_name(&self) -> &str { + match self { + LValue::Name(q) | LValue::Index(q, _) => &q.parts[0], + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Expressions +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq)] +pub enum Expression { + Literal(Literal), + Named(NamedConstant), + Special(SpecialValue), + Variable(QualifiedName), + /// Array / function-style subscript: `name(idx)`. + Index(QualifiedName, Vec), + /// Procedure / built-in call as expression: `name(args)`. + Call(String, Vec), + BuiltIn(BuiltIn), + UnaryMinus(Box), + UnaryPlus(Box), + BinOp(BinOp, Box, Box), + Not(Box), + Paren(Box), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BinOp { + Add, Sub, Mul, Div, Pow, + Eq, Ne, Lt, Le, Gt, Ge, + And, Or, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Literals +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + String(String), + Integer(i64), + Float(f64), + Hex(Vec), + /// `*ON` / `*OFF` as a literal. + Indicator(bool), +} + +// ───────────────────────────────────────────────────────────────────────────── +// Named constants (`*ON`, `*OFF`, `*BLANK`, …) +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NamedConstant { + On, + Off, + Blank, + Blanks, + Zero, + Zeros, + HiVal, + LoVal, + Null, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Special values (`*IN`, `*START`, …) +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq)] +pub enum SpecialValue { + /// `*IN(n)` — indicator by number. + In(Box), + InAll, + On, + Off, + Blank, + Blanks, + Zero, + Zeros, + HiVal, + LoVal, + Null, + /// `*ALL'string'` + All(String), + Omit, + This, + Same, + Start, + End, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Built-in functions +// ───────────────────────────────────────────────────────────────────────────── + +/// The RPG IV `%BUILTIN(…)` functions we actually lower to code. +/// All others are wrapped in `Other`. +#[derive(Debug, Clone, PartialEq)] +pub enum BuiltIn { + /// `%LEN(identifier)` — byte length of a field. + Len(Box), + /// `%TRIM(expr)` — trim leading and trailing blanks. + Trim(Box), + /// `%TRIML(expr)` — trim leading blanks. + TrimL(Box), + /// `%TRIMR(expr)` — trim trailing blanks. + TrimR(Box), + /// `%CHAR(expr)` — convert to character string. + Char(Box), + /// `%INT(expr)` — convert to integer. + Int(Box), + /// `%DEC(expr:digits:decimals)` — convert to packed decimal. + Dec(Box, Box, Box), + /// `%ABS(expr)` — absolute value. + Abs(Box), + /// `%SQRT(expr)` — square root. + Sqrt(Box), + /// `%EOF[(file)]` + Eof(Option), + /// `%FOUND[(file)]` + Found(Option), + /// `%ERROR()` + Error, + /// `%SUBST(str:start:len)` or `%SUBST(str:start)`. + Subst(Box, Box, Option>), + /// `%SCAN(pattern:source[:start])`. + Scan(Box, Box, Option>), + /// `%SIZE(identifier)`. + Size(Box), + /// `%ADDR(identifier)`. + Addr(Box), + /// `%ALLOC(size)`. + Alloc(Box), + /// `%REM(a:b)`. + Rem(Box, Box), + /// `%DIV(a:b)`. + Div(Box, Box), + /// Any built-in we haven't individually modelled. + Other(String, Vec), +} + +// ───────────────────────────────────────────────────────────────────────────── +// Qualified names and argument lists +// ───────────────────────────────────────────────────────────────────────────── + +/// A dot-separated name: `ds.subDs.leaf`. +#[derive(Debug, Clone, PartialEq)] +pub struct QualifiedName { + pub parts: Vec, +} + +impl QualifiedName { + pub fn simple(name: impl Into) -> Self { + QualifiedName { parts: vec![name.into()] } + } + + pub fn is_simple(&self) -> bool { + self.parts.len() == 1 + } + + /// Return the leaf (last) component. + pub fn leaf(&self) -> &str { + self.parts.last().map(|s| s.as_str()).unwrap_or("") + } +} + +impl std::fmt::Display for QualifiedName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.parts.join(".")) + } +} + +/// A call argument. +#[derive(Debug, Clone, PartialEq)] +pub enum Arg { + Expr(Expression), + Omit, +} diff --git a/src/codegen.rs b/src/codegen.rs new file mode 100644 index 0000000..3be277e --- /dev/null +++ b/src/codegen.rs @@ -0,0 +1,1589 @@ +//! codegen.rs — LLVM IR code generation via inkwell. +//! +//! Takes a typed [`Program`] from the lowering pass and emits an LLVM module +//! that can be compiled to a native object file and linked into a standalone +//! binary. +//! +//! ## Architecture +//! +//! * Each RPG IV `DCL-PROC … END-PROC` becomes an LLVM function. +//! * An exported procedure named `main` (or the first exported procedure) is +//! wrapped in a C `main()` entry point so the binary is directly executable. +//! * `DCL-S` standalone variables are allocated as stack slots (`alloca`) inside +//! the function that owns them, or as LLVM global variables for module-scope +//! declarations. +//! * `DSPLY expr;` calls the runtime helper `rpg_dsply(ptr, len)` which is +//! defined in `src/runtime.rs` and compiled into `librpgrt.so`. +//! * String literals are stored as null-terminated byte arrays in `.rodata`. +//! +//! ## Implemented codegen nodes +//! +//! * `CTL-OPT` — recorded but not code-generated. +//! * `DCL-S name CHAR(n) INZ('…');` — alloca + memcpy from rodata string. +//! * `DCL-S name INT(10) INZ(n);` — alloca + store integer. +//! * `DSPLY expr;` — calls `rpg_dsply(i8* ptr, i64 len)`. +//! * `RETURN;` — `ret void` or `ret i64 0` in the C main wrapper. +//! * Assignment `name = expr;` +//! * `IF/ELSEIF/ELSE/ENDIF` — conditional branches. +//! * `DOW/ENDDO`, `DOU/ENDDO` — while / do-while loops. +//! * `FOR … TO … BY … ENDFOR` — counted loops. +//! * `CALLP name(args);` / bare call — direct LLVM calls. +//! * Integer arithmetic / comparison expressions. +//! * `LEAVE` / `ITER` (break / continue) inside loops. + +use std::collections::HashMap; +use std::path::Path; + +use inkwell::builder::Builder; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::passes::PassBuilderOptions; +use inkwell::targets::{ + CodeModel, FileType, InitializationConfig, RelocMode, Target, + TargetMachine, +}; +use inkwell::types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum, FunctionType}; +use inkwell::values::{ + BasicMetadataValueEnum, BasicValueEnum, FunctionValue, + PointerValue, +}; +use inkwell::{AddressSpace, IntPredicate, OptimizationLevel}; + +use crate::ast::*; + +// ───────────────────────────────────────────────────────────────────────────── +// Public API +// ───────────────────────────────────────────────────────────────────────────── + +/// Compile `program` to a native object file at `output_path`. +/// +/// `opt_level` controls LLVM optimisation (0 = none, 1 = less, 2 = default, +/// 3 = aggressive). +pub fn compile_to_object( + program: &Program, + output_path: &Path, + opt_level: u8, +) -> Result<(), CodegenError> { + // Initialise native target (the machine we are running on). + Target::initialize_native(&InitializationConfig::default()) + .map_err(|e| CodegenError::new(format!("LLVM init: {}", e)))?; + + let context = Context::create(); + let module = context.create_module("rpg_program"); + let builder = context.create_builder(); + + let mut cg = Codegen { + context: &context, + module, + builder, + globals: HashMap::new(), + string_cache: HashMap::new(), + global_inits: Vec::new(), + }; + + cg.gen_program(program)?; + + // Run optimisations. + let opt = match opt_level { + 0 => OptimizationLevel::None, + 1 => OptimizationLevel::Less, + 2 => OptimizationLevel::Default, + _ => OptimizationLevel::Aggressive, + }; + + let triple = TargetMachine::get_default_triple(); + let cpu = TargetMachine::get_host_cpu_name(); + let features = TargetMachine::get_host_cpu_features(); + let target = Target::from_triple(&triple) + .map_err(|e| CodegenError::new(format!("target: {}", e)))?; + let machine = target + .create_target_machine( + &triple, + cpu.to_str().unwrap_or(""), + features.to_str().unwrap_or(""), + opt, + RelocMode::PIC, + CodeModel::Default, + ) + .ok_or_else(|| CodegenError::new("could not create target machine"))?; + + // Apply pass pipeline. + if opt_level > 0 { + let opts = PassBuilderOptions::create(); + let pipeline = match opt_level { + 1 => "default", + 2 => "default", + _ => "default", + }; + cg.module + .run_passes(pipeline, &machine, opts) + .map_err(|e| CodegenError::new(format!("passes: {}", e)))?; + } + + // Emit object file. + machine + .write_to_file(&cg.module, FileType::Object, output_path) + .map_err(|e| CodegenError::new(format!("emit object: {}", e)))?; + + Ok(()) +} + +/// Compile `program` to LLVM IR text (for debugging / inspection). +pub fn emit_ir(program: &Program) -> Result { + let context = Context::create(); + let module = context.create_module("rpg_program"); + let builder = context.create_builder(); + + let mut cg = Codegen { + context: &context, + module, + builder, + globals: HashMap::new(), + string_cache: HashMap::new(), + global_inits: Vec::new(), + }; + + cg.gen_program(program)?; + Ok(cg.module.print_to_string().to_string()) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Error type +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug)] +pub struct CodegenError { + pub message: String, +} + +impl CodegenError { + fn new(msg: impl Into) -> Self { + CodegenError { message: msg.into() } + } +} + +impl std::fmt::Display for CodegenError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "codegen error: {}", self.message) + } +} + +impl std::error::Error for CodegenError {} + +// ───────────────────────────────────────────────────────────────────────────── +// Code-generator state +// ───────────────────────────────────────────────────────────────────────────── + +struct Codegen<'ctx> { + context: &'ctx Context, + module: Module<'ctx>, + builder: Builder<'ctx>, + /// Module-scope global variables name -> (alloca/global ptr, TypeSpec) + globals: HashMap, TypeSpec)>, + /// Interned string literal globals (content -> global ptr). + string_cache: HashMap>, + /// Global declarations that need runtime initialisation (INZ with a value). + /// Stored as (name, type, keywords) so `gen_global_init_fn` can emit them. + global_inits: Vec<(String, TypeSpec, Vec)>, +} + +/// Per-function code-generation state. +struct FnState<'ctx> { + /// Stack-allocated locals inside this function. + locals: HashMap, TypeSpec)>, + /// The LLVM function being built. + function: FunctionValue<'ctx>, + /// Block to branch to on `LEAVE` (break). + break_block: Option>, + /// Block to branch to on `ITER` (continue). + continue_block: Option>, +} + +impl<'ctx> FnState<'ctx> { + fn new(function: FunctionValue<'ctx>) -> Self { + FnState { + locals: HashMap::new(), + function, + break_block: None, + continue_block: None, + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Top-level generation +// ───────────────────────────────────────────────────────────────────────────── + +impl<'ctx> Codegen<'ctx> { + fn gen_program(&mut self, program: &Program) -> Result<(), CodegenError> { + // Declare runtime functions. + self.declare_runtime_fns(); + + // Generate module-level globals from top-level declarations. + for decl in &program.declarations { + self.gen_global_decl(decl)?; + } + + // Emit the global-init constructor *before* procedures so that the + // function is available when we build the @llvm.global_ctors entry. + self.gen_global_init_fn()?; + + // Generate each procedure. + let mut exported_name: Option = None; + for proc in &program.procedures { + if proc.exported && exported_name.is_none() { + exported_name = Some(proc.name.clone()); + } + self.gen_procedure(proc)?; + } + + // Emit a C `main()` wrapper that calls the exported entry point. + if let Some(name) = exported_name { + self.gen_main_wrapper(&name)?; + } + + Ok(()) + } + + // ── Runtime declarations ──────────────────────────────────────────────── + + fn declare_runtime_fns(&mut self) { + let i8_ptr = self.context.ptr_type(AddressSpace::default()); + let i64_t = self.context.i64_type(); + let void_t = self.context.void_type(); + + // void rpg_dsply(const char *ptr, int64_t len) + let dsply_ty = void_t.fn_type( + &[i8_ptr.into(), i64_t.into()], + false, + ); + self.module.add_function("rpg_dsply", dsply_ty, None); + + // void rpg_dsply_cstr(const char *ptr) — convenience for cstrings + let dsply_cstr_ty = void_t.fn_type(&[i8_ptr.into()], false); + self.module.add_function("rpg_dsply_cstr", dsply_cstr_ty, None); + + // Declare memcpy / memset from libc (used for CHAR initialisation). + // i8* memcpy(i8* dst, i8* src, i64 n) + let memcpy_ty = i8_ptr.fn_type( + &[i8_ptr.into(), i8_ptr.into(), i64_t.into()], + false, + ); + self.module.add_function("memcpy", memcpy_ty, None); + + // i8* memset(i8* dst, i32 c, i64 n) + let i32_t = self.context.i32_type(); + let memset_ty = i8_ptr.fn_type( + &[i8_ptr.into(), i32_t.into(), i64_t.into()], + false, + ); + self.module.add_function("memset", memset_ty, None); + } + + // ── Global declarations ───────────────────────────────────────────────── + + fn gen_global_decl(&mut self, decl: &Declaration) -> Result<(), CodegenError> { + match decl { + Declaration::Standalone(sd) => { + let size = sd.ty.byte_size().unwrap_or(8); + let arr_ty = self.context.i8_type().array_type(size as u32); + let global = self.module.add_global(arr_ty, Some(AddressSpace::default()), &sd.name); + global.set_initializer(&arr_ty.const_zero()); + let ptr = global.as_pointer_value(); + self.globals.insert(sd.name.clone(), (ptr, sd.ty.clone())); + + // Record any INZ keyword so the constructor can apply it. + let needs_init = sd.keywords.iter().any(|k| matches!( + k, + VarKeyword::Inz | VarKeyword::InzExpr(_) | VarKeyword::InzNamed(_) + )); + if needs_init { + self.global_inits.push(( + sd.name.clone(), + sd.ty.clone(), + sd.keywords.clone(), + )); + } + } + Declaration::Constant(_) | Declaration::NamedConstantDecl(_) => { + // Constants don't require storage — they're inlined at use sites. + } + Declaration::ControlSpec(_) | Declaration::DataStructure(_) | + Declaration::File(_) | Declaration::Subroutine(_) => { + // Not code-generated at module level in this implementation. + } + } + Ok(()) + } + + // ── Procedure ────────────────────────────────────────────────────────── + + fn gen_procedure(&mut self, proc: &Procedure) -> Result, CodegenError> { + // Build LLVM function type from the PI spec (if any). + let fn_ty = self.build_proc_fn_type(proc); + + let fn_name = if proc.exported { + format!("rpg_{}", proc.name) + } else { + proc.name.clone() + }; + + let function = self.module.add_function(&fn_name, fn_ty, None); + let entry_bb = self.context.append_basic_block(function, "entry"); + self.builder.position_at_end(entry_bb); + + let mut state = FnState::new(function); + + // Allocate locals for DCL-S inside the proc. + for decl in &proc.locals { + self.gen_local_decl(decl, &mut state)?; + } + + // Generate each statement. + for stmt in &proc.body { + self.gen_statement(stmt, &mut state)?; + // Stop generating after a terminator to avoid unreachable code. + if self.current_block_is_terminated() { + break; + } + } + + // Ensure the function is properly terminated. + if !self.current_block_is_terminated() { + match fn_ty.get_return_type() { + None => { self.builder.build_return(None).ok(); } + Some(_ty) => { + // Return 0 if the source didn't have an explicit RETURN. + let zero = self.context.i64_type().const_zero(); + self.builder.build_return(Some(&zero)).ok(); + } + } + } + + Ok(function) + } + + fn build_proc_fn_type(&self, proc: &Procedure) -> FunctionType<'ctx> { + let void_t = self.context.void_type(); + if let Some(pi) = &proc.pi { + let ret = pi.return_ty.as_ref() + .and_then(|ty| self.type_spec_to_llvm(ty)); + let params: Vec = pi.params.iter() + .filter_map(|p| self.type_spec_to_llvm(&p.ty)) + .map(|t| t.into()) + .collect(); + match ret { + Some(ret_ty) => ret_ty.fn_type(¶ms, false), + None => void_t.fn_type(¶ms, false), + } + } else { + void_t.fn_type(&[], false) + } + } + + fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + match decl { + Declaration::Standalone(sd) => { + let ptr = self.alloca_for_type(&sd.ty, &sd.name); + // Apply initialiser if any. + for kw in &sd.keywords { + match kw { + VarKeyword::InzExpr(expr) => { + self.init_var_from_expr(ptr, &sd.ty, expr, state)?; + } + VarKeyword::InzNamed(nc) => { + self.init_var_from_named(ptr, &sd.ty, nc)?; + } + VarKeyword::Inz => { + // Zero-initialise. + self.zero_init_var(ptr, &sd.ty)?; + } + _ => {} + } + } + state.locals.insert(sd.name.clone(), (ptr, sd.ty.clone())); + } + Declaration::Constant(_) | Declaration::NamedConstantDecl(_) => {} + _ => {} + } + Ok(()) + } + + fn alloca_for_type(&self, ty: &TypeSpec, name: &str) -> PointerValue<'ctx> { + let size = ty.byte_size().unwrap_or(8) as u32; + let arr_ty = self.context.i8_type().array_type(size); + self.builder.build_alloca(arr_ty, name).unwrap() + } + + fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> { + let size = ty.byte_size().unwrap_or(0); + if size == 0 { return Ok(()); } + let memset = self.module.get_function("memset").unwrap(); + let zero = self.context.i32_type().const_zero(); + let len = self.context.i64_type().const_int(size, false); + self.builder.build_call(memset, &[ptr.into(), zero.into(), len.into()], "memset").ok(); + Ok(()) + } + + fn init_var_from_expr( + &mut self, + ptr: PointerValue<'ctx>, + ty: &TypeSpec, + expr: &Expression, + state: &mut FnState<'ctx>, + ) -> Result<(), CodegenError> { + match ty { + TypeSpec::Char(size_expr) => { + // Copy a string literal into the char buffer (space-padded). + if let Expression::Literal(Literal::String(s)) = expr { + let field_len = const_int_from_expr(size_expr).unwrap_or(s.len() as u64) as usize; + // Build a space-padded string of exactly `field_len` bytes. + let mut padded = vec![b' '; field_len]; + let copy_len = s.len().min(field_len); + padded[..copy_len].copy_from_slice(&s.as_bytes()[..copy_len]); + let str_ptr = self.intern_bytes(&padded); + let memcpy = self.module.get_function("memcpy").unwrap(); + let len_val = self.context.i64_type().const_int(field_len as u64, false); + self.builder.build_call(memcpy, &[ptr.into(), str_ptr.into(), len_val.into()], "init_char").ok(); + } else { + let val = self.gen_expression(expr, state)?; + self.store_value(ptr, val, ty); + } + } + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + let val = self.gen_expression(expr, state)?; + self.store_value(ptr, val, ty); + } + _ => { + let val = self.gen_expression(expr, state).ok(); + if let Some(v) = val { + self.store_value(ptr, v, ty); + } + } + } + Ok(()) + } + + fn init_var_from_named( + &mut self, + ptr: PointerValue<'ctx>, + ty: &TypeSpec, + nc: &NamedConstant, + ) -> Result<(), CodegenError> { + match (ty, nc) { + (TypeSpec::Char(_), NamedConstant::Blanks) | + (TypeSpec::Char(_), NamedConstant::Blank) => { + self.zero_init_var(ptr, ty)?; + } + (TypeSpec::Int(_) | TypeSpec::Uns(_), NamedConstant::Zero) | + (TypeSpec::Int(_) | TypeSpec::Uns(_), NamedConstant::Zeros) => { + self.zero_init_var(ptr, ty)?; + } + _ => { + self.zero_init_var(ptr, ty)?; + } + } + Ok(()) + } + + fn store_value(&self, ptr: PointerValue<'ctx>, val: BasicValueEnum<'ctx>, ty: &TypeSpec) { + let size = ty.byte_size().unwrap_or(8) as u32; + match val { + BasicValueEnum::IntValue(iv) => { + // Cast to the right width. + let target_ty = self.context.custom_width_int_type(size * 8); + let cast = self.builder + .build_int_truncate_or_bit_cast(iv, target_ty, "cast") + .unwrap_or(iv); + let dst = self.builder.build_pointer_cast( + ptr, + self.context.ptr_type(AddressSpace::default()), + "ptr_cast", + ).unwrap_or(ptr); + self.builder.build_store(dst, cast).ok(); + } + BasicValueEnum::FloatValue(fv) => { + let dst = self.builder.build_pointer_cast( + ptr, + self.context.ptr_type(AddressSpace::default()), + "ptr_cast", + ).unwrap_or(ptr); + self.builder.build_store(dst, fv).ok(); + } + _ => {} + } + } + + // ── Global-variable constructor ──────────────────────────────────────── + + /// Emit a `void __rpg_global_init()` function that applies INZ initialisers + /// to module-level `DCL-S` variables, then register it in + /// `@llvm.global_ctors` so the loader runs it before `main`. + fn gen_global_init_fn(&mut self) -> Result<(), CodegenError> { + // Nothing to do if no globals need runtime initialisation. + if self.global_inits.is_empty() { + return Ok(()); + } + + let void_t = self.context.void_type(); + let init_ty = void_t.fn_type(&[], false); + let init_fn = self.module.add_function("__rpg_global_init", init_ty, None); + let bb = self.context.append_basic_block(init_fn, "entry"); + self.builder.position_at_end(bb); + + // We need a temporary FnState so we can call the expression helpers. + // Globals have no locals of their own, so the map is empty. + let mut state = FnState::new(init_fn); + + // Clone the list to avoid borrowing `self` while mutating through it. + let inits: Vec<(String, TypeSpec, Vec)> = self.global_inits.clone(); + + for (name, ty, keywords) in &inits { + // Retrieve the pointer we stored in `self.globals`. + let ptr = match self.globals.get(name) { + Some((p, _)) => *p, + None => continue, + }; + + for kw in keywords { + match kw { + VarKeyword::InzExpr(expr) => { + let expr = expr.clone(); + self.init_var_from_expr(ptr, ty, &expr, &mut state)?; + } + VarKeyword::InzNamed(nc) => { + let nc = nc.clone(); + self.init_var_from_named(ptr, ty, &nc)?; + } + VarKeyword::Inz => { + self.zero_init_var(ptr, ty)?; + } + _ => {} + } + } + } + + self.builder.build_return(None).ok(); + + // Register in @llvm.global_ctors so the dynamic linker calls this + // function before main(). + // + // The ctors array element type is { i32, ptr, ptr }. + let i32_t = self.context.i32_type(); + let ptr_t = self.context.ptr_type(AddressSpace::default()); + let elem_ty = self.context.struct_type( + &[i32_t.into(), ptr_t.into(), ptr_t.into()], + false, + ); + let priority = i32_t.const_int(65535, false); + let fn_ptr = init_fn.as_global_value().as_pointer_value(); + let null_ptr = ptr_t.const_null(); + let ctor_elem = elem_ty.const_named_struct(&[ + priority.into(), + fn_ptr.into(), + null_ptr.into(), + ]); + let arr_ty = elem_ty.array_type(1); + let arr_val = elem_ty.const_array(&[ctor_elem]); + + let ctors = self.module.add_global(arr_ty, Some(AddressSpace::default()), "llvm.global_ctors"); + ctors.set_initializer(&arr_val); + // The `appending` linkage is required for global_ctors. + ctors.set_linkage(inkwell::module::Linkage::Appending); + + Ok(()) + } + + // ── C main() wrapper ──────────────────────────────────────────────────── + + fn gen_main_wrapper(&mut self, rpg_entry: &str) -> Result<(), CodegenError> { + let i32_t = self.context.i32_type(); + let main_ty = i32_t.fn_type(&[], false); + let main_fn = self.module.add_function("main", main_ty, None); + let bb = self.context.append_basic_block(main_fn, "entry"); + self.builder.position_at_end(bb); + + // Call the RPG entry procedure. + let rpg_fn_name = format!("rpg_{}", rpg_entry); + if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) { + self.builder.build_call(rpg_fn, &[], "call_rpg").ok(); + } + + let zero = i32_t.const_zero(); + self.builder.build_return(Some(&zero)).ok(); + Ok(()) + } + + // ── Statement generation ──────────────────────────────────────────────── + + fn gen_statement(&mut self, stmt: &Statement, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + match stmt { + Statement::Return(r) => self.gen_return(r, state), + Statement::Leave => self.gen_leave(state), + Statement::Iter => self.gen_iter(state), + Statement::LeaveSr => self.gen_leave_sr(state), + Statement::Dsply(d) => self.gen_dsply(d, state), + Statement::Assign(a) => self.gen_assign(a, state), + Statement::If(i) => self.gen_if(i, state), + Statement::DoWhile(d) => self.gen_dow(d, state), + Statement::DoUntil(d) => self.gen_dou(d, state), + Statement::For(f) => self.gen_for(f, state), + Statement::Select(s) => self.gen_select(s, state), + Statement::Monitor(m) => self.gen_monitor(m, state), + Statement::CallP(c) => self.gen_callp(c, state), + Statement::ExSr(name) => self.gen_exsr(name, state), + Statement::Reset(_) | + Statement::Clear(_) | + Statement::Io(_) | + Statement::Unimplemented(_) => { + // Silently skip unimplemented statements. + Ok(()) + } + } + } + + fn gen_return(&mut self, r: &ReturnStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let fn_ty = state.function.get_type(); + if fn_ty.get_return_type().is_none() { + // void function + self.builder.build_return(None).ok(); + } else if let Some(expr) = &r.value { + let val = self.gen_expression(expr, state)?; + self.builder.build_return(Some(&val)).ok(); + } else { + let zero = self.context.i64_type().const_zero(); + self.builder.build_return(Some(&zero)).ok(); + } + Ok(()) + } + + fn gen_leave(&mut self, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + if let Some(bb) = state.break_block { + self.builder.build_unconditional_branch(bb).ok(); + } + Ok(()) + } + + fn gen_iter(&mut self, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + if let Some(bb) = state.continue_block { + self.builder.build_unconditional_branch(bb).ok(); + } + Ok(()) + } + + fn gen_leave_sr(&mut self, _state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + // LEAVESR exits the current subroutine — treat as return for now. + self.builder.build_return(None).ok(); + Ok(()) + } + + // ── DSPLY ────────────────────────────────────────────────────────────── + + fn gen_dsply(&mut self, d: &DsplyStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let dsply = self.module.get_function("rpg_dsply") + .ok_or_else(|| CodegenError::new("rpg_dsply not declared"))?; + + match &d.expr { + Expression::Variable(qname) => { + // Look up the variable, then pass ptr + len. + let name = qname.leaf(); + if let Some((ptr, ty)) = self.resolve_var(name, state) { + let len = ty.byte_size().unwrap_or(0); + let len_val = self.context.i64_type().const_int(len, false); + self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok(); + } + } + Expression::Literal(Literal::String(s)) => { + let ptr = self.intern_string(s); + let len_val = self.context.i64_type().const_int(s.len() as u64, false); + self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok(); + } + other => { + // Evaluate as integer-like expression and display it. + if let Ok(val) = self.gen_expression(other, state) { + // For now just call dsply_cstr on an empty string as fallback. + let _ = val; + let empty = self.intern_string(""); + let zero = self.context.i64_type().const_zero(); + self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok(); + } + } + } + Ok(()) + } + + // ── Assignment ───────────────────────────────────────────────────────── + + fn gen_assign(&mut self, a: &AssignStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let name = a.target.base_name(); + let (ptr, ty) = match self.resolve_var(name, state) { + Some(v) => v, + None => return Ok(()), // silently skip if var not found + }; + // Clone to avoid borrow issues. + let ty = ty.clone(); + + match &ty { + TypeSpec::Char(size_expr) => { + if let Expression::Literal(Literal::String(s)) = &a.value { + let field_len = const_int_from_expr(size_expr).unwrap_or(s.len() as u64) as usize; + let mut padded = vec![b' '; field_len]; + let copy = s.len().min(field_len); + padded[..copy].copy_from_slice(&s.as_bytes()[..copy]); + let src = self.intern_bytes(&padded); + let memcpy = self.module.get_function("memcpy").unwrap(); + let len = self.context.i64_type().const_int(field_len as u64, false); + self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok(); + } + } + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + let val = self.gen_expression(&a.value, state)?; + self.store_value(ptr, val, &ty); + } + _ => { + if let Ok(val) = self.gen_expression(&a.value, state) { + self.store_value(ptr, val, &ty); + } + } + } + Ok(()) + } + + // ── IF / ELSEIF / ELSE / ENDIF ───────────────────────────────────────── + + fn gen_if(&mut self, i: &IfStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let func = state.function; + let merge_bb = self.context.append_basic_block(func, "if_merge"); + + let next_bb = self.context.prepend_basic_block(merge_bb, "if_then"); + let else_bb = if i.elseifs.is_empty() && i.else_body.is_none() { + merge_bb + } else { + self.context.prepend_basic_block(merge_bb, "if_else_chain") + }; + + // Condition branch. + let cond = self.gen_bool_condition(&i.condition, state)?; + self.builder.build_conditional_branch(cond, next_bb, else_bb).ok(); + + // Then body. + self.builder.position_at_end(next_bb); + for s in &i.then_body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(merge_bb).ok(); + } + + // ElseIf chain. + let mut current_else = else_bb; + for (idx, elseif) in i.elseifs.iter().enumerate() { + self.builder.position_at_end(current_else); + let then_bb = self.context.append_basic_block(func, &format!("elseif_then_{}", idx)); + let next_else_bb = if idx + 1 < i.elseifs.len() || i.else_body.is_some() { + self.context.append_basic_block(func, &format!("elseif_else_{}", idx)) + } else { + merge_bb + }; + let cond = self.gen_bool_condition(&elseif.condition, state)?; + self.builder.build_conditional_branch(cond, then_bb, next_else_bb).ok(); + + self.builder.position_at_end(then_bb); + for s in &elseif.body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(merge_bb).ok(); + } + current_else = next_else_bb; + } + + // Else body. + if let Some(else_body) = &i.else_body { + self.builder.position_at_end(current_else); + for s in else_body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(merge_bb).ok(); + } + } + + self.builder.position_at_end(merge_bb); + Ok(()) + } + + // ── DOW loop ─────────────────────────────────────────────────────────── + + fn gen_dow(&mut self, d: &DoWhileStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let func = state.function; + let cond_bb = self.context.append_basic_block(func, "dow_cond"); + let body_bb = self.context.append_basic_block(func, "dow_body"); + let after_bb = self.context.append_basic_block(func, "dow_after"); + + self.builder.build_unconditional_branch(cond_bb).ok(); + + // Condition. + self.builder.position_at_end(cond_bb); + let cond = self.gen_bool_condition(&d.condition, state)?; + self.builder.build_conditional_branch(cond, body_bb, after_bb).ok(); + + // Body. + self.builder.position_at_end(body_bb); + let saved_break = state.break_block.replace(after_bb); + let saved_continue = state.continue_block.replace(cond_bb); + for s in &d.body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + state.break_block = saved_break; + state.continue_block = saved_continue; + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(cond_bb).ok(); + } + + self.builder.position_at_end(after_bb); + Ok(()) + } + + // ── DOU loop ─────────────────────────────────────────────────────────── + + fn gen_dou(&mut self, d: &DoUntilStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let func = state.function; + let body_bb = self.context.append_basic_block(func, "dou_body"); + let cond_bb = self.context.append_basic_block(func, "dou_cond"); + let after_bb = self.context.append_basic_block(func, "dou_after"); + + self.builder.build_unconditional_branch(body_bb).ok(); + + // Body. + self.builder.position_at_end(body_bb); + let saved_break = state.break_block.replace(after_bb); + let saved_continue = state.continue_block.replace(cond_bb); + for s in &d.body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + state.break_block = saved_break; + state.continue_block = saved_continue; + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(cond_bb).ok(); + } + + // Condition — branch back to body if NOT met. + self.builder.position_at_end(cond_bb); + let cond = self.gen_bool_condition(&d.condition, state)?; + self.builder.build_conditional_branch(cond, after_bb, body_bb).ok(); + + self.builder.position_at_end(after_bb); + Ok(()) + } + + // ── FOR loop ─────────────────────────────────────────────────────────── + + fn gen_for(&mut self, f: &ForStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let func = state.function; + let i64_t = self.context.i64_type(); + + // Allocate loop variable. + let loop_var = self.builder.build_alloca(i64_t, &f.var).unwrap(); + let start = self.gen_expression(&f.start, state)?; + let start_i = self.coerce_to_i64(start); + self.builder.build_store(loop_var, start_i).ok(); + state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10)))))); + + let cond_bb = self.context.append_basic_block(func, "for_cond"); + let body_bb = self.context.append_basic_block(func, "for_body"); + let incr_bb = self.context.append_basic_block(func, "for_incr"); + let after_bb = self.context.append_basic_block(func, "for_after"); + + self.builder.build_unconditional_branch(cond_bb).ok(); + + // Condition. + self.builder.position_at_end(cond_bb); + let cur = self.builder.build_load(i64_t, loop_var, "cur").unwrap(); + let cur_i = cur.into_int_value(); + let limit = self.gen_expression(&f.limit, state)?; + let limit_i = self.coerce_to_i64(limit); + let pred = if f.downto { IntPredicate::SGE } else { IntPredicate::SLE }; + let cond = self.builder.build_int_compare(pred, cur_i, limit_i, "for_cond").unwrap(); + self.builder.build_conditional_branch(cond, body_bb, after_bb).ok(); + + // Body. + self.builder.position_at_end(body_bb); + let saved_break = state.break_block.replace(after_bb); + let saved_continue = state.continue_block.replace(incr_bb); + for s in &f.body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + state.break_block = saved_break; + state.continue_block = saved_continue; + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(incr_bb).ok(); + } + + // Increment / decrement. + self.builder.position_at_end(incr_bb); + let step = if let Some(step_expr) = &f.step { + let sv = self.gen_expression(step_expr, state)?; + self.coerce_to_i64(sv) + } else { + i64_t.const_int(1, false) + }; + let cur2 = self.builder.build_load(i64_t, loop_var, "cur2").unwrap().into_int_value(); + let next = if f.downto { + self.builder.build_int_sub(cur2, step, "dec").unwrap() + } else { + self.builder.build_int_add(cur2, step, "inc").unwrap() + }; + self.builder.build_store(loop_var, next).ok(); + self.builder.build_unconditional_branch(cond_bb).ok(); + + self.builder.position_at_end(after_bb); + Ok(()) + } + + // ── SELECT / WHEN / OTHER ────────────────────────────────────────────── + + fn gen_select(&mut self, s: &SelectStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + let func = state.function; + let merge_bb = self.context.append_basic_block(func, "select_merge"); + + for (i, when) in s.whens.iter().enumerate() { + let then_bb = self.context.prepend_basic_block(merge_bb, &format!("when_{}", i)); + let next_bb = if i + 1 < s.whens.len() || s.other.is_some() { + self.context.prepend_basic_block(then_bb, &format!("when_check_{}", i + 1)) + } else { + merge_bb + }; + + let cond = self.gen_bool_condition(&when.condition, state)?; + self.builder.build_conditional_branch(cond, then_bb, next_bb).ok(); + + self.builder.position_at_end(then_bb); + for st in &when.body { + self.gen_statement(st, state)?; + if self.current_block_is_terminated() { break; } + } + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(merge_bb).ok(); + } + + self.builder.position_at_end(next_bb); + } + + if let Some(other) = &s.other { + for st in other { + self.gen_statement(st, state)?; + if self.current_block_is_terminated() { break; } + } + if !self.current_block_is_terminated() { + self.builder.build_unconditional_branch(merge_bb).ok(); + } + self.builder.position_at_end(merge_bb); + } + + Ok(()) + } + + // ── MONITOR / ON-ERROR ───────────────────────────────────────────────── + + fn gen_monitor(&mut self, m: &MonitorStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + // Simplified: just generate the body, ignore ON-ERROR (no exceptions). + for s in &m.body { + self.gen_statement(s, state)?; + if self.current_block_is_terminated() { break; } + } + Ok(()) + } + + // ── CALLP ────────────────────────────────────────────────────────────── + + fn gen_callp(&mut self, c: &CallPStmt, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + // Look for a function with this name or rpg_. + let callee = self.module.get_function(&c.name) + .or_else(|| self.module.get_function(&format!("rpg_{}", c.name))); + + if let Some(callee) = callee { + let mut args: Vec = Vec::new(); + for arg in &c.args { + match arg { + Arg::Expr(e) => { + if let Ok(v) = self.gen_expression(e, state) { + args.push(v.into()); + } + } + Arg::Omit => {} + } + } + self.builder.build_call(callee, &args, "callp").ok(); + } + Ok(()) + } + + // ── EXSR ─────────────────────────────────────────────────────────────── + + fn gen_exsr(&mut self, name: &str, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { + // Subroutines aren't first-class functions in this impl. + // Look for an inline subroutine function generated from the AST. + let callee = self.module.get_function(name) + .or_else(|| self.module.get_function(&format!("rpg_{}", name))); + if let Some(callee) = callee { + self.builder.build_call(callee, &[], "exsr").ok(); + } + let _ = state; + Ok(()) + } + + // ── Expression generation ─────────────────────────────────────────────── + + fn gen_expression(&mut self, expr: &Expression, state: &mut FnState<'ctx>) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + match expr { + Expression::Literal(lit) => self.gen_literal(lit), + + Expression::Named(nc) => { + let v = match nc { + NamedConstant::On => i64_t.const_int(1, false), + NamedConstant::Off => i64_t.const_int(0, false), + NamedConstant::Zero | NamedConstant::Zeros => i64_t.const_zero(), + _ => i64_t.const_zero(), + }; + Ok(v.into()) + } + + Expression::Variable(qname) => { + let name = qname.leaf(); + if let Some((ptr, ty)) = self.resolve_var(name, state) { + let llvm_ty = self.type_spec_to_llvm(&ty) + .unwrap_or(BasicTypeEnum::IntType(i64_t)); + match &ty { + TypeSpec::Int(w) | TypeSpec::Uns(w) => { + let width = const_int_from_expr(w).unwrap_or(8); + let int_ty = self.context.custom_width_int_type((width * 8) as u32); + if let Ok(v) = self.builder.build_load(int_ty, ptr, name) { + let iv = v.into_int_value(); + let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); + return Ok(ext.into()); + } + } + _ => { + // For CHAR / other types, return the pointer itself. + return Ok(ptr.into()); + } + } + let _ = llvm_ty; + Err(CodegenError::new(format!("could not load variable '{}'", name))) + } else { + // Return 0 for unknown variables. + Ok(i64_t.const_zero().into()) + } + } + + Expression::BinOp(op, lhs, rhs) => { + self.gen_binop(op, lhs, rhs, state) + } + + Expression::UnaryMinus(e) => { + let v = self.gen_expression(e, state)?; + let iv = self.coerce_to_i64(v); + let neg = self.builder.build_int_neg(iv, "neg").unwrap(); + Ok(neg.into()) + } + + Expression::UnaryPlus(e) => { + self.gen_expression(e, state) + } + + Expression::Not(e) => { + let v = self.gen_expression(e, state)?; + let iv = self.coerce_to_i64(v); + let zero = i64_t.const_zero(); + let cmp = self.builder.build_int_compare(IntPredicate::EQ, iv, zero, "not").unwrap(); + let ext = self.builder.build_int_z_extend(cmp, i64_t, "zext").unwrap(); + Ok(ext.into()) + } + + Expression::Paren(e) => self.gen_expression(e, state), + + Expression::Call(name, args) => { + // Treat call-as-expression similarly to CALLP. + let callee = self.module.get_function(name) + .or_else(|| self.module.get_function(&format!("rpg_{}", name))); + if let Some(callee) = callee { + let mut cargs: Vec = Vec::new(); + for arg in args { + if let Arg::Expr(e) = arg { + if let Ok(v) = self.gen_expression(e, state) { + cargs.push(v.into()); + } + } + } + let call = self.builder.build_call(callee, &cargs, "call").unwrap(); + match call.try_as_basic_value() { + inkwell::values::ValueKind::Basic(v) => return Ok(v), + inkwell::values::ValueKind::Instruction(_) => {} + } + } + Ok(i64_t.const_zero().into()) + } + + Expression::BuiltIn(bif) => self.gen_builtin(bif, state), + + Expression::Special(_) | Expression::Index(_, _) => { + Ok(i64_t.const_zero().into()) + } + } + } + + fn gen_literal(&mut self, lit: &Literal) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + match lit { + Literal::Integer(n) => { + Ok(i64_t.const_int(*n as u64, *n < 0).into()) + } + Literal::Float(f) => { + let f64_t = self.context.f64_type(); + Ok(f64_t.const_float(*f).into()) + } + Literal::String(s) => { + let ptr = self.intern_string(s); + Ok(ptr.into()) + } + Literal::Hex(bytes) => { + let ptr = self.intern_bytes(bytes); + Ok(ptr.into()) + } + Literal::Indicator(b) => { + let v = i64_t.const_int(if *b { 1 } else { 0 }, false); + Ok(v.into()) + } + } + } + + fn gen_binop( + &mut self, + op: &BinOp, + lhs: &Expression, + rhs: &Expression, + state: &mut FnState<'ctx>, + ) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + + let lv = self.gen_expression(lhs, state)?; + let rv = self.gen_expression(rhs, state)?; + + match op { + BinOp::Add => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_int_add(l, r, "add").unwrap().into()) + } + BinOp::Sub => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_int_sub(l, r, "sub").unwrap().into()) + } + BinOp::Mul => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_int_mul(l, r, "mul").unwrap().into()) + } + BinOp::Div => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_int_signed_div(l, r, "div").unwrap().into()) + } + BinOp::Pow => { + // No native pow for integers — use 1 as fallback. + let _ = (lv, rv); + Ok(i64_t.const_int(1, false).into()) + } + BinOp::Eq => { + let cmp = self.cmp_values(lv, rv, IntPredicate::EQ)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::Ne => { + let cmp = self.cmp_values(lv, rv, IntPredicate::NE)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::Lt => { + let cmp = self.cmp_values(lv, rv, IntPredicate::SLT)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::Le => { + let cmp = self.cmp_values(lv, rv, IntPredicate::SLE)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::Gt => { + let cmp = self.cmp_values(lv, rv, IntPredicate::SGT)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::Ge => { + let cmp = self.cmp_values(lv, rv, IntPredicate::SGE)?; + Ok(self.builder.build_int_z_extend(cmp, i64_t, "bool").unwrap().into()) + } + BinOp::And => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_and(l, r, "and").unwrap().into()) + } + BinOp::Or => { + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_or(l, r, "or").unwrap().into()) + } + } + } + + fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + match bif { + BuiltIn::Len(e) => { + // %LEN(field) — return compile-time field length. + if let Expression::Variable(qname) = e.as_ref() { + let name = qname.leaf(); + if let Some((_, ty)) = self.resolve_var(name, state) { + let len = ty.byte_size().unwrap_or(0); + return Ok(i64_t.const_int(len, false).into()); + } + } + Ok(i64_t.const_zero().into()) + } + BuiltIn::Trim(e) | BuiltIn::TrimL(e) | BuiltIn::TrimR(e) => { + // Return the pointer unchanged — runtime trim is not implemented. + self.gen_expression(e, state) + } + BuiltIn::Abs(e) => { + let v = self.gen_expression(e, state)?; + let iv = self.coerce_to_i64(v); + let zero = i64_t.const_zero(); + let neg = self.builder.build_int_neg(iv, "neg").unwrap(); + let cmp = self.builder.build_int_compare(IntPredicate::SGE, iv, zero, "ge0").unwrap(); + let abs = self.builder.build_select(cmp, iv, neg, "abs").unwrap(); + Ok(abs.into()) + } + BuiltIn::Int(e) | BuiltIn::Char(e) => { + self.gen_expression(e, state) + } + BuiltIn::Sqrt(e) => { + let v = self.gen_expression(e, state)?; + let f64_t = self.context.f64_type(); + let fv = match v { + BasicValueEnum::IntValue(iv) => { + self.builder.build_signed_int_to_float(iv, f64_t, "i2f").unwrap() + } + BasicValueEnum::FloatValue(fv) => fv, + _ => f64_t.const_float(0.0), + }; + // Declare llvm.sqrt.f64 intrinsic. + let sqrt_ty = f64_t.fn_type(&[f64_t.into()], false); + let sqrt_fn = self.module.get_function("llvm.sqrt.f64") + .unwrap_or_else(|| self.module.add_function("llvm.sqrt.f64", sqrt_ty, None)); + let call_result = self.builder.build_call(sqrt_fn, &[fv.into()], "sqrt").unwrap(); + match call_result.try_as_basic_value() { + inkwell::values::ValueKind::Basic(v) => Ok(v), + inkwell::values::ValueKind::Instruction(_) => Ok(fv.into()), + } + } + BuiltIn::Eof(_) | BuiltIn::Found(_) | BuiltIn::Error => { + Ok(i64_t.const_zero().into()) + } + BuiltIn::Rem(a, b) => { + let av = self.gen_expression(a, state)?; + let bv = self.gen_expression(b, state)?; + let ai = self.coerce_to_i64(av); + let bi = self.coerce_to_i64(bv); + Ok(self.builder.build_int_signed_rem(ai, bi, "rem").unwrap().into()) + } + BuiltIn::Div(a, b) => { + let av = self.gen_expression(a, state)?; + let bv = self.gen_expression(b, state)?; + let ai = self.coerce_to_i64(av); + let bi = self.coerce_to_i64(bv); + Ok(self.builder.build_int_signed_div(ai, bi, "div").unwrap().into()) + } + BuiltIn::Size(e) => { + if let Expression::Variable(qname) = e.as_ref() { + let name = qname.leaf(); + if let Some((_, ty)) = self.resolve_var(name, state) { + let sz = ty.byte_size().unwrap_or(0); + return Ok(i64_t.const_int(sz, false).into()); + } + } + Ok(i64_t.const_zero().into()) + } + BuiltIn::Subst(s, start, len) => { + // Return pointer into the string at start-1 (RPG is 1-based). + let sp = self.gen_expression(s, state)?; + let st = self.gen_expression(start, state)?; + let sti = self.coerce_to_i64(st); + let one = i64_t.const_int(1, false); + let off = self.builder.build_int_sub(sti, one, "off").unwrap(); + if let BasicValueEnum::PointerValue(ptr) = sp { + let elem_ptr = unsafe { + self.builder.build_gep( + self.context.i8_type(), + ptr, + &[off], + "subst_ptr", + ).unwrap() + }; + let _ = len; + return Ok(elem_ptr.into()); + } + Ok(sp) + } + BuiltIn::Other(_, args) => { + // Try to evaluate first argument as a pass-through. + if let Some(first) = args.first() { + return self.gen_expression(first, state); + } + Ok(i64_t.const_zero().into()) + } + _ => Ok(i64_t.const_zero().into()), + } + } + + // ── Bool condition helper ────────────────────────────────────────────── + + fn gen_bool_condition( + &mut self, + expr: &Expression, + state: &mut FnState<'ctx>, + ) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + let i1_t = self.context.bool_type(); + let val = self.gen_expression(expr, state)?; + match val { + BasicValueEnum::IntValue(iv) => { + if iv.get_type() == i1_t { + return Ok(iv); + } + let zero = i64_t.const_zero(); + let ext = self.builder.build_int_z_extend(iv, i64_t, "ext").unwrap_or(iv); + Ok(self.builder.build_int_compare(IntPredicate::NE, ext, zero, "bool_cond").unwrap()) + } + _ => { + // Treat non-integer as "true". + Ok(i1_t.const_int(1, false)) + } + } + } + + fn cmp_values( + &self, + lv: BasicValueEnum<'ctx>, + rv: BasicValueEnum<'ctx>, + pred: IntPredicate, + ) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + let l = self.coerce_to_i64(lv); + let r = self.coerce_to_i64(rv); + Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero())) + } + + fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> { + let i64_t = self.context.i64_type(); + match val { + BasicValueEnum::IntValue(iv) => { + let bits = iv.get_type().get_bit_width(); + if bits < 64 { + self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv) + } else if bits > 64 { + self.builder.build_int_truncate(iv, i64_t, "trunc").unwrap_or(iv) + } else { + iv + } + } + BasicValueEnum::FloatValue(fv) => { + self.builder.build_float_to_signed_int(fv, i64_t, "f2i").unwrap_or_else(|_| i64_t.const_zero()) + } + _ => i64_t.const_zero(), + } + } + + // ── String / byte interning ───────────────────────────────────────────── + + fn intern_string(&mut self, s: &str) -> PointerValue<'ctx> { + self.intern_bytes(s.as_bytes()) + } + + fn intern_bytes(&mut self, bytes: &[u8]) -> PointerValue<'ctx> { + // Build a stable key from the bytes. + let key = format!("{:?}", bytes); + if let Some(&ptr) = self.string_cache.get(&key) { + return ptr; + } + + // Create a null-terminated global constant. + let i8_t = self.context.i8_type(); + let mut data = bytes.to_vec(); + data.push(0); // null terminator + let arr_vals: Vec<_> = data.iter() + .map(|&b| i8_t.const_int(b as u64, false)) + .collect(); + let arr_ty = i8_t.array_type(data.len() as u32); + let arr_val = i8_t.const_array(&arr_vals); + + let global = self.module.add_global(arr_ty, Some(AddressSpace::default()), ".str"); + global.set_initializer(&arr_val); + global.set_constant(true); + global.set_unnamed_addr(true); + // We keep it as a global pointer; the GEP to get i8* is done when needed. + let ptr = global.as_pointer_value(); + self.string_cache.insert(key, ptr); + ptr + } + + // ── Type helpers ─────────────────────────────────────────────────────── + + fn type_spec_to_llvm(&self, ty: &TypeSpec) -> Option> { + let i8_t = self.context.i8_type(); + let i64_t = self.context.i64_type(); + match ty { + TypeSpec::Char(e) | TypeSpec::VarChar(e) => { + let n = const_int_from_expr(e).unwrap_or(1) as u32; + Some(i8_t.array_type(n).into()) + } + TypeSpec::Int(e) | TypeSpec::Uns(e) => { + let bytes = const_int_from_expr(e).unwrap_or(8); + let bits = match bytes { + 3 => 8, + 5 => 16, + 10 => 32, + 20 => 64, + _ => 64, + }; + Some(self.context.custom_width_int_type(bits).into()) + } + TypeSpec::Float(_) => { + Some(self.context.f64_type().into()) + } + TypeSpec::Ind => { + Some(self.context.bool_type().into()) + } + TypeSpec::Pointer => { + Some(self.context.ptr_type(AddressSpace::default()).into()) + } + TypeSpec::Packed(digits, _) | TypeSpec::Zoned(digits, _) => { + // Store as i64 — a real implementation would use BCD. + let _ = digits; + Some(i64_t.into()) + } + _ => None, + } + } + + // ── Variable lookup ──────────────────────────────────────────────────── + + fn resolve_var( + &self, + name: &str, + state: &FnState<'ctx>, + ) -> Option<(PointerValue<'ctx>, TypeSpec)> { + if let Some((ptr, ty)) = state.locals.get(name) { + return Some((*ptr, ty.clone())); + } + if let Some((ptr, ty)) = self.globals.get(name) { + return Some((*ptr, ty.clone())); + } + None + } + + // ── Utility ──────────────────────────────────────────────────────────── + + fn current_block_is_terminated(&self) -> bool { + self.builder + .get_insert_block() + .and_then(|bb| bb.get_terminator()) + .is_some() + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Free helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Extract a constant integer from a simple `Expression::Literal(Integer(n))`. +fn const_int_from_expr(expr: &Expression) -> Option { + if let Expression::Literal(Literal::Integer(n)) = expr { + Some(*n as u64) + } else { + None + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Unit tests +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::lower::lower; + + fn get_ir(src: &str) -> String { + let prog = lower(src).expect("lower"); + emit_ir(&prog).expect("emit_ir") + } + + #[test] + fn ir_contains_main_wrapper() { + let src = "DCL-PROC main EXPORT;\n RETURN;\nEND-PROC;"; + let ir = get_ir(src); + assert!(ir.contains("define") && (ir.contains("@main") || ir.contains("@rpg_main")), + "IR should define main:\n{}", ir); + } + + #[test] + fn ir_declares_rpg_dsply() { + let src = "DCL-PROC main EXPORT;\n RETURN;\nEND-PROC;"; + let ir = get_ir(src); + assert!(ir.contains("rpg_dsply"), "IR should declare rpg_dsply:\n{}", ir); + } + + #[test] + fn ir_hello_world() { + let src = include_str!("../hello.rpg"); + let prog = lower(src).expect("lower hello.rpg"); + let ir = emit_ir(&prog).expect("emit_ir hello.rpg"); + // The IR must contain the dsply call and both main functions. + assert!(ir.contains("rpg_dsply") || ir.contains("rpg_dsply_cstr"), + "IR should call rpg_dsply:\n{}", &ir[..ir.len().min(2000)]); + } + + #[test] + fn ir_for_loop() { + let src = r#" +DCL-PROC looper EXPORT; + DCL-S i INT(10); + FOR i = 1 TO 10; + i = i + 1; + ENDFOR; + RETURN; +END-PROC; +"#; + let ir = get_ir(src); + assert!(ir.contains("for_cond") || ir.contains("br"), "FOR loop should emit branches:\n{}", ir); + } + + #[test] + fn ir_if_stmt() { + let src = r#" +DCL-PROC check EXPORT; + DCL-S x INT(10) INZ(5); + IF x = 5; + RETURN; + ENDIF; + RETURN; +END-PROC; +"#; + let ir = get_ir(src); + assert!(ir.contains("if_then") || ir.contains("br"), "IF should emit branches:\n{}", ir); + } +} diff --git a/src/lib.rs b/src/lib.rs index 70b1fd2..93b6327 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,13 @@ //! //! Loads the BNF grammar embedded at compile time, builds a [`bnf::GrammarParser`], //! and exposes helpers used by both the compiler binary and the demo binary. +//! +//! Also provides the typed AST ([`ast`]), BNF-to-AST lowering pass ([`lower`]), +//! and LLVM code-generator ([`codegen`]) used by the compiler pipeline. + +pub mod ast; +pub mod lower; +pub mod codegen; use bnf::{Grammar, Term}; diff --git a/src/lower.rs b/src/lower.rs new file mode 100644 index 0000000..cc0841b --- /dev/null +++ b/src/lower.rs @@ -0,0 +1,2758 @@ +//! lower.rs — Tokenizer and recursive-descent lowering pass. +//! +//! Converts validated RPG IV free-format source text into the typed [`Program`] +//! AST defined in `ast.rs`. +//! +//! The BNF parser (`bnf` crate) is only used to *validate* the source; the +//! lowering pass is a hand-written recursive descent parser that produces the +//! richer typed tree needed by the LLVM code generator. +//! +//! ## Supported subset +//! +//! The pass fully lowers the constructs needed to compile `hello.rpg` and +//! programs of similar complexity: +//! +//! * `CTL-OPT` control specs +//! * `DCL-S` / `DCL-C` / `DCL-DS` / `DCL-F` declarations +//! * `DCL-PROC … END-PROC` procedures with `DCL-PI … END-PI` +//! * `BEG-SR … END-SR` subroutines +//! * All expression forms (literals, arithmetic, logical, comparisons, BIFs) +//! * `DSPLY`, `RETURN`, `LEAVE`, `ITER`, `LEAVESR`, `EXSR`, `CLEAR`, `RESET` +//! * `IF/ELSEIF/ELSE/ENDIF`, `DOW/ENDDO`, `DOU/ENDDO`, `FOR/ENDFOR` +//! * `SELECT/WHEN/OTHER/ENDSL`, `MONITOR/ON-ERROR/ENDMON` +//! * `CALLP` and bare procedure calls +//! * All I/O statements (kept as `Statement::Io`) +//! * Assignment (`lvalue = expr` / `EVAL lvalue = expr`) +//! +//! Constructs outside this subset produce `Statement::Unimplemented` or +//! `Declaration` placeholder variants rather than hard errors, so the +//! compiler can still lower the parts it understands. + +use crate::ast::*; + +// ───────────────────────────────────────────────────────────────────────────── +// Public entry point +// ───────────────────────────────────────────────────────────────────────────── + +/// Lower `source` into a typed [`Program`]. +/// +/// Returns `Err` only if the tokenizer fails completely. Individual +/// unrecognised constructs are silently kept as `Unimplemented` nodes. +pub fn lower(source: &str) -> Result { + let tokens = tokenize(source)?; + let mut parser = Parser::new(tokens); + let program = parser.parse_program()?; + Ok(program) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Error type +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug)] +pub struct LowerError { + pub message: String, +} + +impl std::fmt::Display for LowerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "lower error: {}", self.message) + } +} + +impl std::error::Error for LowerError {} + +impl LowerError { + fn new(msg: impl Into) -> Self { + LowerError { message: msg.into() } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tokens +// ───────────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, PartialEq)] +enum Token { + // Keywords / compound keywords + KwCtlOpt, + KwDclS, + KwDclC, + KwDclDs, + KwEndDs, + KwDclF, + KwDclProc, + KwEndProc, + KwDclPi, + KwEndPi, + KwBegSr, + KwEndSr, + + // Type specifiers + KwChar, + KwVarChar, + KwGraph, + KwVarGraph, + KwUcs2, + KwVarUcs2, + KwInt, + KwUns, + KwFloat, + KwPacked, + KwZoned, + KwBindec, + KwInd, + KwDate, + KwTime, + KwTimestamp, + KwPointer, + KwProcPtr, + KwLike, + KwLikeDs, + KwLikeRec, + KwObject, + + // Variable keywords + KwInz, + KwConst, + KwValue, + KwStatic, + KwBased, + KwDim, + KwAscend, + KwDescend, + KwAltSeq, + KwOpDesc, + KwNoOpt, + KwVolatile, + KwOptions, + + // Proc keywords + KwExport, + KwExtProc, + KwDclCase, + + // Control option keywords + KwNoMain, + KwMain, + KwDftActGrp, + KwActGrp, + KwOption, + KwDatFmt, + KwTimFmt, + KwDecEdit, + KwAlwNull, + KwDebug, + KwExpOpts, + KwBndDir, + KwCopyright, + KwStgMdl, + KwTruncNbr, + KwText, + + // DS keywords + KwQualified, + KwTemplate, + KwExtName, + KwLikeRec2, + + // Statements + KwEval, + KwEvalR, + KwEvalCorr, + KwIf, + KwElseIf, + KwElse, + KwEndIf, + KwDow, + KwDou, + KwEndDo, + KwFor, + KwTo, + KwDownTo, + KwBy, + KwEndFor, + KwSelect, + KwWhen, + KwOther, + KwEndSl, + KwMonitor, + KwOnError, + KwEndMon, + KwReturn, + KwLeave, + KwIter, + KwLeaveSr, + KwExSr, + KwCallP, + KwDsply, + KwReset, + KwClear, + KwSortA, + KwDump, + KwForce, + KwPost, + KwFeod, + KwUnlock, + KwDeAlloc, + + // I/O statements + KwRead, + KwReadP, + KwReadE, + KwReadPE, + KwWrite, + KwUpdate, + KwDelete, + KwChain, + KwSetLL, + KwSetGT, + KwOpen, + KwClose, + KwExcept, + KwExFmt, + KwIn, + KwOut, + KwUnlockFile, + KwCommit, + KwRollback, + + // Named constants / special values + KwOn, // *ON + KwOff, // *OFF + KwBlank, // *BLANK + KwBlanks, // *BLANKS + KwZero, // *ZERO + KwZeros, // *ZEROS + KwHiVal, // *HIVAL + KwLoVal, // *LOVAL + KwNull, // *NULL + KwYes, // *YES + KwNo, // *NO + KwAll, // *ALL + KwStart, // *START + KwEnd, // *END + KwProgram, // *PROGRAM + KwFile, // *FILE (in ON-ERROR) + KwOmit, // *OMIT + KwThis, // *THIS + KwSame, // *SAME + KwIn2, // *IN + KwCaller, // *CALLER + KwNew, // *NEW + KwNoPass, // *NOPASS + KwVarSize, // *VARSIZE + KwUsrCtl, // *USRCTL + KwInputOnly,// *INPUTONLY + KwInputOpt, // *INPUT (usage) + KwOutputOpt,// *OUTPUT (usage) + KwUpdateOpt,// *UPDATE + KwDeleteOpt,// *DELETE + KwKey, // *KEY + KwSrcStmt, // *SRCSTMT + KwNodeBugIo,// *NODEBUGIO + KwNoUnRef, // *NOUNREF + KwNoShowCpy,// *NOSHOWCPY + KwResDecPos,// *RESDECPOS + KwSnglLvl, // *SNGLVL + KwTeraSpace,// *TERASPACE + KwInherit, // *INHERIT + KwExtDft, // *EXTDFT + KwCl, // *CL + + // Date/time formats + KwMdy, KwDmy, KwYmd, KwJul, + KwIso, KwUsa, KwEur, KwJis, + KwHms, + + // Duration codes + KwYears, KwMonths, KwDays, + KwHours, KwMinutes, KwSeconds, KwMSeconds, + + // Logical operators (keyword form) + KwAnd, + KwOr, + KwNot, + + // Built-in function names + BifAbs, + BifAddr, + BifAlloc, + BifBitAnd, + BifBitNot, + BifBitOr, + BifBitXor, + BifChar, + BifCheck, + BifCheckR, + BifDate, + BifDays, + BifDec, + BifDecH, + BifDecPos, + BifDiff, + BifDiv, + BifEditC, + BifEditFlt, + BifEditW, + BifElem, + BifEof, + BifEqual, + BifError, + BifFields, + BifFloat, + BifFound, + BifGraph, + BifHours, + BifInt, + BifIntH, + BifKds, + BifLen, + BifMinutes, + BifMonths, + BifMSeconds, + BifNullInd, + BifOccur, + BifOpen, + BifPAddr, + BifParms, + BifReAlloc, + BifRem, + BifReplace, + BifScan, + BifScanR, + BifSeconds, + BifShtDn, + BifSize, + BifSqrt, + BifStatus, + BifStr, + BifSubArr, + BifSubst, + BifThis, + BifTime, + BifTimestamp, + BifTrim, + BifTrimL, + BifTrimR, + BifUcs2, + BifUns, + BifUnsH, + BifXFoot, + BifXLate, + BifYears, + + // Special eval option tokens (single-char keyword args) + LitH, // 'H' inside eval(...) + LitT, // 'T' + LitE, // 'E' + LitN, // 'N' + LitA, // 'A' + LitD, // 'D' + + // Operators + OpStar2, // ** + OpStar, // * + OpPlus, + OpMinus, + OpSlash, + OpEq, + OpNe, // <> + OpLe, // <= + OpGe, // >= + OpLt, + OpGt, + + // Punctuation + LParen, + RParen, + Semicolon, + Colon, + Dot, + + // Primitives + Identifier(String), + StringLit(String), + IntLit(i64), + FloatLit(f64), + HexLit(String), + + Eof, +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tokenizer +// ───────────────────────────────────────────────────────────────────────────── + +fn tokenize(source: &str) -> Result, LowerError> { + let chars: Vec = source.chars().collect(); + let mut pos = 0; + let mut tokens = Vec::new(); + + while pos < chars.len() { + // Skip whitespace + if chars[pos].is_whitespace() { + pos += 1; + continue; + } + + // Line comments: // ... + if pos + 1 < chars.len() && chars[pos] == '/' && chars[pos + 1] == '/' { + while pos < chars.len() && chars[pos] != '\n' { + pos += 1; + } + continue; + } + + // Block comments: /* ... */ + if pos + 1 < chars.len() && chars[pos] == '/' && chars[pos + 1] == '*' { + pos += 2; + while pos + 1 < chars.len() { + if chars[pos] == '*' && chars[pos + 1] == '/' { + pos += 2; + break; + } + pos += 1; + } + continue; + } + + // String literal + if chars[pos] == '\'' { + pos += 1; + let mut s = String::new(); + while pos < chars.len() { + if chars[pos] == '\'' { + // '' means a literal single-quote inside the string + if pos + 1 < chars.len() && chars[pos + 1] == '\'' { + s.push('\''); + pos += 2; + } else { + pos += 1; + break; + } + } else { + s.push(chars[pos]); + pos += 1; + } + } + tokens.push(Token::StringLit(s)); + continue; + } + + // Hex literal X'...' + if chars[pos] == 'X' || chars[pos] == 'x' { + if pos + 1 < chars.len() && chars[pos + 1] == '\'' { + pos += 2; + let mut h = String::new(); + while pos < chars.len() && chars[pos] != '\'' { + h.push(chars[pos]); + pos += 1; + } + if pos < chars.len() { pos += 1; } + tokens.push(Token::HexLit(h)); + continue; + } + } + + // Numeric literal + if chars[pos].is_ascii_digit() || (chars[pos] == '.' && pos + 1 < chars.len() && chars[pos + 1].is_ascii_digit()) { + let start = pos; + while pos < chars.len() && chars[pos].is_ascii_digit() { + pos += 1; + } + if pos < chars.len() && chars[pos] == '.' { + pos += 1; + while pos < chars.len() && chars[pos].is_ascii_digit() { + pos += 1; + } + let s: String = chars[start..pos].iter().collect(); + let f: f64 = s.parse().unwrap_or(0.0); + tokens.push(Token::FloatLit(f)); + } else { + let s: String = chars[start..pos].iter().collect(); + let n: i64 = s.parse().unwrap_or(0); + tokens.push(Token::IntLit(n)); + } + continue; + } + + // Operators and punctuation + match chars[pos] { + '(' => { tokens.push(Token::LParen); pos += 1; continue; } + ')' => { tokens.push(Token::RParen); pos += 1; continue; } + ';' => { tokens.push(Token::Semicolon); pos += 1; continue; } + ':' => { tokens.push(Token::Colon); pos += 1; continue; } + '.' => { tokens.push(Token::Dot); pos += 1; continue; } + '+' => { tokens.push(Token::OpPlus); pos += 1; continue; } + '-' => { tokens.push(Token::OpMinus); pos += 1; continue; } + '/' => { tokens.push(Token::OpSlash); pos += 1; continue; } + '=' => { tokens.push(Token::OpEq); pos += 1; continue; } + '*' => { + if pos + 1 < chars.len() && chars[pos + 1] == '*' { + tokens.push(Token::OpStar2); + pos += 2; + } else { + // Could be start of *ON, *OFF, *BLANK etc. + // peek ahead + let next_pos = pos + 1; + if next_pos < chars.len() && (chars[next_pos].is_alphabetic() || chars[next_pos] == '_') { + // Read the *WORD + pos += 1; // skip * + let start = pos; + while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_') { + pos += 1; + } + let word: String = chars[start..pos].iter().collect(); + let upper = word.to_uppercase(); + let tok = match upper.as_str() { + "ON" => Token::KwOn, + "OFF" => Token::KwOff, + "BLANK" => Token::KwBlank, + "BLANKS" => Token::KwBlanks, + "ZERO" => Token::KwZero, + "ZEROS" => Token::KwZeros, + "HIVAL" => Token::KwHiVal, + "LOVAL" => Token::KwLoVal, + "NULL" => Token::KwNull, + "YES" => Token::KwYes, + "NO" => Token::KwNo, + "ALL" => Token::KwAll, + "START" => Token::KwStart, + "END" => Token::KwEnd, + "PROGRAM" => Token::KwProgram, + "FILE" => Token::KwFile, + "OMIT" => Token::KwOmit, + "THIS" => Token::KwThis, + "SAME" => Token::KwSame, + "IN" => Token::KwIn2, + "CALLER" => Token::KwCaller, + "NEW" => Token::KwNew, + "NOPASS" => Token::KwNoPass, + "VARSIZE" => Token::KwVarSize, + "USRCTL" => Token::KwUsrCtl, + "INPUTONLY" => Token::KwInputOnly, + "INPUT" => Token::KwInputOpt, + "OUTPUT" => Token::KwOutputOpt, + "UPDATE" => Token::KwUpdateOpt, + "DELETE" => Token::KwDeleteOpt, + "KEY" => Token::KwKey, + "SRCSTMT" => Token::KwSrcStmt, + "NODEBUGIO" => Token::KwNodeBugIo, + "NOUNREF" => Token::KwNoUnRef, + "NOSHOWCPY" => Token::KwNoShowCpy, + "RESDECPOS" => Token::KwResDecPos, + "SNGLVL" => Token::KwSnglLvl, + "TERASPACE" => Token::KwTeraSpace, + "INHERIT" => Token::KwInherit, + "EXTDFT" => Token::KwExtDft, + "CL" => Token::KwCl, + "DCLCASE" => Token::KwDclCase, + "YEARS" => Token::KwYears, + "MONTHS" => Token::KwMonths, + "DAYS" => Token::KwDays, + "HOURS" => Token::KwHours, + "MINUTES" => Token::KwMinutes, + "SECONDS" => Token::KwSeconds, + "MSECONDS" => Token::KwMSeconds, + "N" => Token::LitN, + _ => Token::Identifier(format!("*{}", word)), + }; + tokens.push(tok); + } else { + tokens.push(Token::OpStar); + pos += 1; + } + } + continue; + } + '<' => { + if pos + 1 < chars.len() && chars[pos + 1] == '>' { + tokens.push(Token::OpNe); + pos += 2; + } else if pos + 1 < chars.len() && chars[pos + 1] == '=' { + tokens.push(Token::OpLe); + pos += 2; + } else { + tokens.push(Token::OpLt); + pos += 1; + } + continue; + } + '>' => { + if pos + 1 < chars.len() && chars[pos + 1] == '=' { + tokens.push(Token::OpGe); + pos += 2; + } else { + tokens.push(Token::OpGt); + pos += 1; + } + continue; + } + '%' => { + // Built-in function + pos += 1; + let start = pos; + while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_') { + pos += 1; + } + let name: String = chars[start..pos].iter().collect(); + let upper = name.to_uppercase(); + let tok = match upper.as_str() { + "ABS" => Token::BifAbs, + "ADDR" => Token::BifAddr, + "ALLOC" => Token::BifAlloc, + "BITAND" => Token::BifBitAnd, + "BITNOT" => Token::BifBitNot, + "BITOR" => Token::BifBitOr, + "BITXOR" => Token::BifBitXor, + "CHAR" => Token::BifChar, + "CHECK" => Token::BifCheck, + "CHECKR" => Token::BifCheckR, + "DATE" => Token::BifDate, + "DAYS" => Token::BifDays, + "DEC" => Token::BifDec, + "DECH" => Token::BifDecH, + "DECPOS" => Token::BifDecPos, + "DIFF" => Token::BifDiff, + "DIV" => Token::BifDiv, + "EDITC" => Token::BifEditC, + "EDITFLT" => Token::BifEditFlt, + "EDITW" => Token::BifEditW, + "ELEM" => Token::BifElem, + "EOF" => Token::BifEof, + "EQUAL" => Token::BifEqual, + "ERROR" => Token::BifError, + "FIELDS" => Token::BifFields, + "FLOAT" => Token::BifFloat, + "FOUND" => Token::BifFound, + "GRAPH" => Token::BifGraph, + "HOURS" => Token::BifHours, + "INT" => Token::BifInt, + "INTH" => Token::BifIntH, + "KDS" => Token::BifKds, + "LEN" => Token::BifLen, + "MINUTES" => Token::BifMinutes, + "MONTHS" => Token::BifMonths, + "MSECONDS" => Token::BifMSeconds, + "NULLIND" => Token::BifNullInd, + "OCCUR" => Token::BifOccur, + "OPEN" => Token::BifOpen, + "PADDR" => Token::BifPAddr, + "PARMS" => Token::BifParms, + "REALLOC" => Token::BifReAlloc, + "REM" => Token::BifRem, + "REPLACE" => Token::BifReplace, + "SCAN" => Token::BifScan, + "SCANR" => Token::BifScanR, + "SECONDS" => Token::BifSeconds, + "SHTDN" => Token::BifShtDn, + "SIZE" => Token::BifSize, + "SQRT" => Token::BifSqrt, + "STATUS" => Token::BifStatus, + "STR" => Token::BifStr, + "SUBARR" => Token::BifSubArr, + "SUBST" => Token::BifSubst, + "THIS" => Token::BifThis, + "TIME" => Token::BifTime, + "TIMESTAMP" => Token::BifTimestamp, + "TRIM" => Token::BifTrim, + "TRIML" => Token::BifTrimL, + "TRIMR" => Token::BifTrimR, + "UCS2" => Token::BifUcs2, + "UNS" => Token::BifUns, + "UNSH" => Token::BifUnsH, + "XFOOT" => Token::BifXFoot, + "XLATE" => Token::BifXLate, + "YEARS" => Token::BifYears, + _ => Token::Identifier(format!("%{}", name)), + }; + tokens.push(tok); + continue; + } + _ => {} + } + + // Identifier / keyword — may include hyphens (DCL-S, CTL-OPT, etc.) + if chars[pos].is_alphabetic() || chars[pos] == '_' || chars[pos] == '@' || chars[pos] == '#' || chars[pos] == '$' { + let start = pos; + while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_' || chars[pos] == '-' || chars[pos] == '@' || chars[pos] == '#' || chars[pos] == '$') { + // Hyphens are part of compound keywords only — stop at operator context + // We include hyphens here and will classify after. + // Edge case: we must not swallow the `-` that's an arithmetic minus. + // Strategy: include hyphen only if the previous char was alpha/digit + // and the next char is also alpha. (handled below) + if chars[pos] == '-' { + if pos + 1 < chars.len() && chars[pos + 1].is_alphabetic() { + pos += 1; // include hyphen + } else { + break; + } + } else { + pos += 1; + } + } + let word: String = chars[start..pos].iter().collect(); + let upper = word.to_uppercase(); + let tok = keyword_or_ident(&upper, &word); + tokens.push(tok); + continue; + } + + // Unknown character — skip + pos += 1; + } + + tokens.push(Token::Eof); + Ok(tokens) +} + +/// Classify a word (already upper-cased) as a keyword token or identifier. +fn keyword_or_ident(upper: &str, original: &str) -> Token { + match upper { + // Compound declaration keywords + "CTL-OPT" => Token::KwCtlOpt, + "DCL-S" => Token::KwDclS, + "DCL-C" => Token::KwDclC, + "DCL-DS" => Token::KwDclDs, + "END-DS" => Token::KwEndDs, + "DCL-F" => Token::KwDclF, + "DCL-PROC" => Token::KwDclProc, + "END-PROC" => Token::KwEndProc, + "DCL-PI" => Token::KwDclPi, + "END-PI" => Token::KwEndPi, + "BEG-SR" => Token::KwBegSr, + "END-SR" => Token::KwEndSr, + "ON-ERROR" => Token::KwOnError, + "EVAL-CORR" => Token::KwEvalCorr, + + // Type keywords + "CHAR" => Token::KwChar, + "VARCHAR" => Token::KwVarChar, + "GRAPH" => Token::KwGraph, + "VARGRAPH" => Token::KwVarGraph, + "UCS2" => Token::KwUcs2, + "VARUCS2" => Token::KwVarUcs2, + "INT" => Token::KwInt, + "UNS" => Token::KwUns, + "FLOAT" => Token::KwFloat, + "PACKED" => Token::KwPacked, + "ZONED" => Token::KwZoned, + "BINDEC" => Token::KwBindec, + "IND" => Token::KwInd, + "DATE" => Token::KwDate, + "TIME" => Token::KwTime, + "TIMESTAMP" => Token::KwTimestamp, + "POINTER" => Token::KwPointer, + "PROCPTR" => Token::KwProcPtr, + "LIKE" => Token::KwLike, + "LIKEDS" => Token::KwLikeDs, + "LIKEREC" => Token::KwLikeRec, + "OBJECT" => Token::KwObject, + + // Variable / struct / param keywords + "INZ" => Token::KwInz, + "CONST" => Token::KwConst, + "VALUE" => Token::KwValue, + "STATIC" => Token::KwStatic, + "BASED" => Token::KwBased, + "DIM" => Token::KwDim, + "ASCEND" => Token::KwAscend, + "DESCEND" => Token::KwDescend, + "ALTSEQ" => Token::KwAltSeq, + "OPDESC" => Token::KwOpDesc, + "NOOPT" => Token::KwNoOpt, + "VOLATILE" => Token::KwVolatile, + "OPTIONS" => Token::KwOptions, + "QUALIFIED" => Token::KwQualified, + "TEMPLATE" => Token::KwTemplate, + "EXTNAME" => Token::KwExtName, + + // Proc keywords + "EXPORT" => Token::KwExport, + "EXTPROC" => Token::KwExtProc, + + // Control option keywords + "NOMAIN" => Token::KwNoMain, + "MAIN" => Token::KwMain, + "DFTACTGRP" => Token::KwDftActGrp, + "ACTGRP" => Token::KwActGrp, + "OPTION" => Token::KwOption, + "DATFMT" => Token::KwDatFmt, + "TIMFMT" => Token::KwTimFmt, + "DECEDIT" => Token::KwDecEdit, + "ALWNULL" => Token::KwAlwNull, + "DEBUG" => Token::KwDebug, + "EXPROPTS" => Token::KwExpOpts, + "BNDDIR" => Token::KwBndDir, + "COPYRIGHT" => Token::KwCopyright, + "STGMDL" => Token::KwStgMdl, + "TRUNCNBR" => Token::KwTruncNbr, + "TEXT" => Token::KwText, + + // Statement keywords + "EVAL" => Token::KwEval, + "EVALR" => Token::KwEvalR, + "IF" => Token::KwIf, + "ELSEIF" => Token::KwElseIf, + "ELSE" => Token::KwElse, + "ENDIF" => Token::KwEndIf, + "DOW" => Token::KwDow, + "DOU" => Token::KwDou, + "ENDDO" => Token::KwEndDo, + "FOR" => Token::KwFor, + "TO" => Token::KwTo, + "DOWNTO" => Token::KwDownTo, + "BY" => Token::KwBy, + "ENDFOR" => Token::KwEndFor, + "SELECT" => Token::KwSelect, + "WHEN" => Token::KwWhen, + "OTHER" => Token::KwOther, + "ENDSL" => Token::KwEndSl, + "MONITOR" => Token::KwMonitor, + "ENDMON" => Token::KwEndMon, + "RETURN" => Token::KwReturn, + "LEAVE" => Token::KwLeave, + "ITER" => Token::KwIter, + "LEAVESR" => Token::KwLeaveSr, + "EXSR" => Token::KwExSr, + "CALLP" => Token::KwCallP, + "DSPLY" => Token::KwDsply, + "RESET" => Token::KwReset, + "CLEAR" => Token::KwClear, + "SORTA" => Token::KwSortA, + "DUMP" => Token::KwDump, + "FORCE" => Token::KwForce, + "POST" => Token::KwPost, + "FEOD" => Token::KwFeod, + "UNLOCK" => Token::KwUnlock, + "DEALLOC" => Token::KwDeAlloc, + + // I/O + "READ" => Token::KwRead, + "READP" => Token::KwReadP, + "READE" => Token::KwReadE, + "READPE" => Token::KwReadPE, + "WRITE" => Token::KwWrite, + "UPDATE" => Token::KwUpdate, + "DELETE" => Token::KwDelete, + "CHAIN" => Token::KwChain, + "SETLL" => Token::KwSetLL, + "SETGT" => Token::KwSetGT, + "OPEN" => Token::KwOpen, + "CLOSE" => Token::KwClose, + "EXCEPT" => Token::KwExcept, + "EXFMT" => Token::KwExFmt, + "COMMIT" => Token::KwCommit, + "ROLLBACK" => Token::KwRollback, + + // Logical operators + "AND" => Token::KwAnd, + "OR" => Token::KwOr, + "NOT" => Token::KwNot, + + // Date/time formats (bare keyword versions) + "MDY" => Token::KwMdy, + "DMY" => Token::KwDmy, + "YMD" => Token::KwYmd, + "JUL" => Token::KwJul, + "ISO" => Token::KwIso, + "USA" => Token::KwUsa, + "EUR" => Token::KwEur, + "JIS" => Token::KwJis, + "HMS" => Token::KwHms, + + _ => Token::Identifier(original.to_string()), + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Parser +// ───────────────────────────────────────────────────────────────────────────── + +struct Parser { + tokens: Vec, + pos: usize, +} + +impl Parser { + fn new(tokens: Vec) -> Self { + Parser { tokens, pos: 0 } + } + + fn peek(&self) -> &Token { + self.tokens.get(self.pos).unwrap_or(&Token::Eof) + } + + fn peek2(&self) -> &Token { + self.tokens.get(self.pos + 1).unwrap_or(&Token::Eof) + } + + fn advance(&mut self) -> Token { + let tok = self.tokens.get(self.pos).cloned().unwrap_or(Token::Eof); + if tok != Token::Eof { + self.pos += 1; + } + tok + } + + fn expect(&mut self, expected: &Token) -> Result<(), LowerError> { + let tok = self.advance(); + if &tok == expected { + Ok(()) + } else { + Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok))) + } + } + + fn eat(&mut self, expected: &Token) -> bool { + if self.peek() == expected { + self.advance(); + true + } else { + false + } + } + + fn eat_semicolon(&mut self) { + self.eat(&Token::Semicolon); + } + + fn is_eof(&self) -> bool { + matches!(self.peek(), Token::Eof) + } + + // ── Top-level ────────────────────────────────────────────────────────── + + fn parse_program(&mut self) -> Result { + let mut declarations = Vec::new(); + let mut procedures = Vec::new(); + + while !self.is_eof() { + match self.peek() { + Token::KwDclProc => { + if let Ok(p) = self.parse_procedure() { + procedures.push(p); + } + } + Token::KwCtlOpt | + Token::KwDclS | + Token::KwDclC | + Token::KwDclDs | + Token::KwDclF | + Token::KwBegSr => { + if let Ok(d) = self.parse_declaration() { + declarations.push(d); + } + } + _ => { + // Skip unrecognised top-level tokens + self.advance(); + } + } + } + + Ok(Program { declarations, procedures }) + } + + // ── Declarations ─────────────────────────────────────────────────────── + + fn parse_declaration(&mut self) -> Result { + match self.peek().clone() { + Token::KwCtlOpt => self.parse_ctl_opt(), + Token::KwDclS => self.parse_dcl_s(), + Token::KwDclC => self.parse_dcl_c(), + Token::KwDclDs => self.parse_dcl_ds(), + Token::KwDclF => self.parse_dcl_f(), + Token::KwBegSr => self.parse_subroutine(), + tok => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))), + } + } + + fn parse_ctl_opt(&mut self) -> Result { + self.advance(); // KwCtlOpt + let mut keywords = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + let kw = self.parse_ctl_keyword(); + keywords.push(kw); + } + self.eat_semicolon(); + Ok(Declaration::ControlSpec(ControlSpec { keywords })) + } + + fn parse_ctl_keyword(&mut self) -> CtlKeyword { + match self.peek().clone() { + Token::KwDftActGrp => { + self.advance(); + let val = self.parse_star_bool_arg(); + CtlKeyword::DftActGrp(val) + } + Token::KwNoMain => { + self.advance(); + CtlKeyword::NoMain + } + Token::KwMain => { + self.advance(); + let name = self.parse_paren_ident().unwrap_or_default(); + CtlKeyword::Main(name) + } + tok => { + // Consume the keyword and any parenthesised argument + let s = format!("{:?}", tok); + self.advance(); + if self.peek() == &Token::LParen { + self.skip_paren_group(); + } + CtlKeyword::Other(s) + } + } + } + + /// Parse `(*YES)` or `(*NO)` returning a boolean. + fn parse_star_bool_arg(&mut self) -> bool { + if self.peek() != &Token::LParen { return true; } + self.advance(); // ( + let result = match self.peek() { + Token::KwYes => { self.advance(); true } + Token::KwNo => { self.advance(); false } + _ => { self.advance(); false } + }; + self.eat(&Token::RParen); + result + } + + fn parse_paren_ident(&mut self) -> Option { + if self.peek() != &Token::LParen { return None; } + self.advance(); // ( + let name = if let Token::Identifier(s) = self.peek().clone() { + self.advance(); + Some(s) + } else { + None + }; + self.eat(&Token::RParen); + name + } + + fn parse_dcl_s(&mut self) -> Result { + self.advance(); // KwDclS + let name = self.expect_name()?; + let ty = self.parse_type_spec()?; + let mut keywords = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + keywords.push(self.parse_var_keyword()); + } + self.eat_semicolon(); + Ok(Declaration::Standalone(StandaloneDecl { name, ty, keywords })) + } + + fn parse_dcl_c(&mut self) -> Result { + self.advance(); // KwDclC + let name = self.expect_name()?; + + // `DCL-C name CONST(literal)` or `DCL-C name literal` or `DCL-C name *named` + match self.peek().clone() { + Token::KwConst => { + self.advance(); // CONST + self.expect(&Token::LParen)?; + let lit = self.parse_literal()?; + self.expect(&Token::RParen)?; + self.eat_semicolon(); + Ok(Declaration::Constant(ConstantDecl { name, value: lit })) + } + Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | + Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | + Token::KwNull => { + let nc = self.parse_named_constant()?; + self.eat_semicolon(); + Ok(Declaration::NamedConstantDecl(NamedConstantDecl { name, value: nc })) + } + _ => { + let lit = self.parse_literal()?; + self.eat_semicolon(); + Ok(Declaration::Constant(ConstantDecl { name, value: lit })) + } + } + } + + fn parse_dcl_ds(&mut self) -> Result { + self.advance(); // KwDclDs + let name = self.expect_name()?; + let mut keywords = Vec::new(); + // Parse DS keywords (before ';') + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + match self.peek().clone() { + Token::KwQualified => { self.advance(); keywords.push(DsKeyword::Qualified); } + Token::KwTemplate => { self.advance(); keywords.push(DsKeyword::Template); } + tok => { + let s = format!("{:?}", tok); + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + keywords.push(DsKeyword::Other(s)); + } + } + } + self.eat_semicolon(); + let mut fields = Vec::new(); + // Parse fields until END-DS + while !matches!(self.peek(), Token::KwEndDs | Token::Eof) { + if let Ok(field) = self.parse_ds_field() { + fields.push(field); + } else { + self.advance(); + } + } + self.eat(&Token::KwEndDs); + self.eat_semicolon(); + Ok(Declaration::DataStructure(DataStructureDecl { name, keywords, fields })) + } + + fn parse_ds_field(&mut self) -> Result { + let name = self.expect_name()?; + let ty = self.parse_type_spec()?; + let mut keywords = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + keywords.push(self.parse_var_keyword()); + } + self.eat_semicolon(); + Ok(DsField { name, ty, keywords }) + } + + fn parse_dcl_f(&mut self) -> Result { + self.advance(); // KwDclF + let name = self.expect_name()?; + let mut keywords = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + let s = format!("{:?}", self.peek().clone()); + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + keywords.push(s); + } + self.eat_semicolon(); + Ok(Declaration::File(FileDecl { name, keywords })) + } + + fn parse_subroutine(&mut self) -> Result { + self.advance(); // KwBegSr + let name = self.expect_name()?; + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwEndSr]); + self.eat(&Token::KwEndSr); + self.eat_semicolon(); + Ok(Declaration::Subroutine(Subroutine { name, body })) + } + + // ── Type specifiers ──────────────────────────────────────────────────── + + fn parse_type_spec(&mut self) -> Result { + match self.peek().clone() { + Token::KwChar | Token::KwVarChar | Token::KwGraph | Token::KwVarGraph | + Token::KwUcs2 | Token::KwVarUcs2 => { + let is_char = matches!(self.peek(), Token::KwChar); + self.advance(); + self.expect(&Token::LParen)?; + let expr = self.parse_expression()?; + self.expect(&Token::RParen)?; + if is_char { + Ok(TypeSpec::Char(Box::new(expr))) + } else { + Ok(TypeSpec::VarChar(Box::new(expr))) + } + } + Token::KwInt => { + self.advance(); + self.expect(&Token::LParen)?; + let expr = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Int(Box::new(expr))) + } + Token::KwUns => { + self.advance(); + self.expect(&Token::LParen)?; + let expr = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Uns(Box::new(expr))) + } + Token::KwFloat => { + self.advance(); + self.expect(&Token::LParen)?; + let expr = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Float(Box::new(expr))) + } + Token::KwPacked => { + self.advance(); + self.expect(&Token::LParen)?; + let digits = self.parse_expression()?; + self.expect(&Token::Colon)?; + let decimals = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Packed(Box::new(digits), Box::new(decimals))) + } + Token::KwZoned => { + self.advance(); + self.expect(&Token::LParen)?; + let digits = self.parse_expression()?; + self.expect(&Token::Colon)?; + let decimals = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Zoned(Box::new(digits), Box::new(decimals))) + } + Token::KwBindec => { + self.advance(); + self.expect(&Token::LParen)?; + let digits = self.parse_expression()?; + self.expect(&Token::Colon)?; + let decimals = self.parse_expression()?; + self.expect(&Token::RParen)?; + Ok(TypeSpec::Bindec(Box::new(digits), Box::new(decimals))) + } + Token::KwInd => { + self.advance(); + Ok(TypeSpec::Ind) + } + Token::KwDate => { + self.advance(); + if self.peek() == &Token::LParen { + self.skip_paren_group(); + } + Ok(TypeSpec::Date) + } + Token::KwTime => { + self.advance(); + if self.peek() == &Token::LParen { + self.skip_paren_group(); + } + Ok(TypeSpec::Time) + } + Token::KwTimestamp => { + self.advance(); + if self.peek() == &Token::LParen { + self.skip_paren_group(); + } + Ok(TypeSpec::Timestamp) + } + Token::KwPointer => { + self.advance(); + Ok(TypeSpec::Pointer) + } + Token::KwLike => { + self.advance(); + let name = self.parse_paren_ident().unwrap_or_default(); + Ok(TypeSpec::Like(name)) + } + Token::KwLikeDs => { + self.advance(); + let name = self.parse_paren_ident().unwrap_or_default(); + Ok(TypeSpec::LikeDs(name)) + } + tok => { + Err(LowerError::new(format!("expected type spec, got {:?}", tok))) + } + } + } + + // ── Variable keyword ────────────────────────────────────────────────── + + fn parse_var_keyword(&mut self) -> VarKeyword { + match self.peek().clone() { + Token::KwInz => { + self.advance(); + if self.peek() == &Token::LParen { + self.advance(); // ( + // Check for named constant + match self.peek().clone() { + Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | + Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | + Token::KwNull => { + if let Ok(nc) = self.parse_named_constant() { + self.eat(&Token::RParen); + return VarKeyword::InzNamed(nc); + } + } + Token::KwExtDft => { + self.advance(); + self.eat(&Token::RParen); + return VarKeyword::Other("INZ(*EXTDFT)".to_string()); + } + _ => {} + } + if let Ok(expr) = self.parse_expression() { + self.eat(&Token::RParen); + return VarKeyword::InzExpr(expr); + } + self.eat(&Token::RParen); + VarKeyword::Inz + } else { + VarKeyword::Inz + } + } + Token::KwStatic => { + self.advance(); + VarKeyword::Static + } + tok => { + let s = format!("{:?}", tok); + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + VarKeyword::Other(s) + } + } + } + + // ── Procedure ───────────────────────────────────────────────────────── + + fn parse_procedure(&mut self) -> Result { + self.advance(); // KwDclProc + let name = self.expect_name()?; + let mut exported = false; + let mut kw_tokens = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + match self.peek().clone() { + Token::KwExport => { self.advance(); exported = true; } + tok => { + kw_tokens.push(format!("{:?}", tok)); + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + } + } + } + self.eat_semicolon(); + + // Optional DCL-PI + let pi = if self.peek() == &Token::KwDclPi { + Some(self.parse_pi()?) + } else { + None + }; + + // Local declarations + let mut locals = Vec::new(); + loop { + match self.peek() { + Token::KwDclS | Token::KwDclC | Token::KwDclDs | Token::KwDclF | Token::KwBegSr => { + if let Ok(d) = self.parse_declaration() { + locals.push(d); + } + } + _ => break, + } + } + + // Body statements until END-PROC + let body = self.parse_statement_list(&[Token::KwEndProc]); + self.eat(&Token::KwEndProc); + self.eat_semicolon(); + + Ok(Procedure { name, exported, pi, locals, body }) + } + + fn parse_pi(&mut self) -> Result { + self.advance(); // KwDclPi + // name or *N + let name = match self.peek().clone() { + Token::Identifier(s) => { self.advance(); s } + _ => { + // Could be *N, or a keyword used as a name, or a real keyword. + if let Some(n) = token_as_name(self.peek()) { + self.advance(); + n + } else { + self.advance(); + "*N".to_string() + } + } + }; + // optional return type before ';' + let return_ty = if !matches!(self.peek(), Token::Semicolon | Token::Eof) { + self.parse_type_spec().ok() + } else { + None + }; + self.eat_semicolon(); + let mut params = Vec::new(); + while !matches!(self.peek(), Token::KwEndPi | Token::Eof) { + if let Ok(p) = self.parse_pi_param() { + params.push(p); + } else { + self.advance(); + } + } + self.eat(&Token::KwEndPi); + self.eat_semicolon(); + Ok(PiSpec { name, return_ty, params }) + } + + fn parse_pi_param(&mut self) -> Result { + let name = self.expect_name()?; + let ty = self.parse_type_spec()?; + let mut keywords = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + match self.peek().clone() { + Token::KwValue => { self.advance(); keywords.push(ParamKeyword::Value); } + Token::KwConst => { self.advance(); keywords.push(ParamKeyword::Const); } + tok => { + let s = format!("{:?}", tok); + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + keywords.push(ParamKeyword::Other(s)); + } + } + } + self.eat_semicolon(); + Ok(PiParam { name, ty, keywords }) + } + + // ── Statement list ───────────────────────────────────────────────────── + + fn parse_statement_list(&mut self, terminators: &[Token]) -> Vec { + let mut stmts = Vec::new(); + loop { + if self.is_eof() { break; } + if terminators.iter().any(|t| t == self.peek()) { break; } + // Also stop at certain keywords that signal end-of-block + if matches!(self.peek(), + Token::KwElse | Token::KwElseIf | Token::KwEndIf | + Token::KwEndDo | Token::KwEndFor | Token::KwEndSl | + Token::KwEndMon | Token::KwWhen | Token::KwOther | + Token::KwOnError | Token::KwEndSr + ) { + break; + } + match self.parse_statement() { + Ok(s) => stmts.push(s), + Err(_) => { self.recover_to_semicolon(); } + } + } + stmts + } + + fn recover_to_semicolon(&mut self) { + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + self.advance(); + } + self.eat(&Token::Semicolon); + } + + // ── Individual statements ────────────────────────────────────────────── + + fn parse_statement(&mut self) -> Result { + match self.peek().clone() { + Token::KwReturn => self.parse_return(), + Token::KwLeave => { self.advance(); self.eat_semicolon(); Ok(Statement::Leave) } + Token::KwIter => { self.advance(); self.eat_semicolon(); Ok(Statement::Iter) } + Token::KwLeaveSr => { self.advance(); self.eat_semicolon(); Ok(Statement::LeaveSr) } + Token::KwExSr => self.parse_exsr(), + Token::KwDsply => self.parse_dsply(), + Token::KwIf => self.parse_if(), + Token::KwDow => self.parse_dow(), + Token::KwDou => self.parse_dou(), + Token::KwFor => self.parse_for(), + Token::KwSelect => self.parse_select(), + Token::KwMonitor => self.parse_monitor(), + Token::KwEval | Token::KwEvalR | Token::KwEvalCorr => self.parse_assign_eval(), + Token::KwCallP => self.parse_callp(), + Token::KwClear => self.parse_clear(), + Token::KwReset => self.parse_reset(), + Token::KwDump => { + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + self.eat_semicolon(); + Ok(Statement::Unimplemented("DUMP".into())) + } + Token::KwSortA => { + self.advance(); + let _ = self.expect_ident(); + self.eat_semicolon(); + Ok(Statement::Unimplemented("SORTA".into())) + } + Token::KwDeAlloc => { + self.advance(); + if self.peek() == &Token::LParen { self.skip_paren_group(); } + let _ = self.expect_ident(); + self.eat_semicolon(); + Ok(Statement::Unimplemented("DEALLOC".into())) + } + Token::KwForce | Token::KwPost | Token::KwFeod | Token::KwUnlock => { + let kw = format!("{:?}", self.advance()); + let _ = self.expect_ident(); + self.eat_semicolon(); + Ok(Statement::Unimplemented(kw)) + } + + // I/O statements + Token::KwRead => self.parse_read(), + Token::KwReadP => self.parse_readp(), + Token::KwReadE => self.parse_reade(), + Token::KwReadPE => self.parse_readpe(), + Token::KwWrite => self.parse_write(), + Token::KwUpdate => self.parse_update(), + Token::KwDelete => self.parse_delete(), + Token::KwChain => self.parse_chain(), + Token::KwSetLL => self.parse_setll(), + Token::KwSetGT => self.parse_setgt(), + Token::KwOpen => self.parse_open(), + Token::KwClose => self.parse_close(), + Token::KwExcept => self.parse_except(), + Token::KwExFmt => self.parse_exfmt(), + Token::KwCommit | Token::KwRollback => { + let kw = format!("{:?}", self.advance()); + self.eat_semicolon(); + Ok(Statement::Unimplemented(kw)) + } + + Token::Identifier(_) => { + // Could be: implicit CALLP `name(args);` or assignment `name = expr;` + self.parse_ident_stmt() + } + tok => { + Err(LowerError::new(format!("unexpected statement token: {:?}", tok))) + } + } + } + + fn parse_return(&mut self) -> Result { + self.advance(); // KwReturn + if self.peek() == &Token::Semicolon { + self.advance(); + return Ok(Statement::Return(ReturnStmt { value: None })); + } + let expr = self.parse_expression().ok(); + self.eat_semicolon(); + Ok(Statement::Return(ReturnStmt { value: expr })) + } + + fn parse_exsr(&mut self) -> Result { + self.advance(); // KwExSr + let name = self.expect_name()?; + self.eat_semicolon(); + Ok(Statement::ExSr(name)) + } + + fn parse_dsply(&mut self) -> Result { + self.advance(); // KwDsply + // Two forms: + // DSPLY expr; + // DSPLY (expr : msgq : response); + if self.peek() == &Token::LParen { + // peek ahead — if the next token after '(' looks like an expression + // followed by ':' it's the three-arg form + self.advance(); // ( + let expr = self.parse_expression()?; + let mut msg_q = None; + let mut response = None; + if self.eat(&Token::Colon) { + if let Token::Identifier(s) = self.peek().clone() { + self.advance(); + msg_q = Some(s); + } else { + self.eat(&Token::Colon); + } + if self.eat(&Token::Colon) { + if let Token::Identifier(s) = self.peek().clone() { + self.advance(); + response = Some(s); + } + } + } + self.eat(&Token::RParen); + self.eat_semicolon(); + Ok(Statement::Dsply(DsplyStmt { expr, msg_q, response })) + } else { + let expr = self.parse_expression()?; + self.eat_semicolon(); + Ok(Statement::Dsply(DsplyStmt { expr, msg_q: None, response: None })) + } + } + + fn parse_if(&mut self) -> Result { + self.advance(); // KwIf + let condition = self.parse_expression()?; + self.eat_semicolon(); + let then_body = self.parse_statement_list(&[ + Token::KwElseIf, Token::KwElse, Token::KwEndIf, + ]); + let mut elseifs = Vec::new(); + while self.peek() == &Token::KwElseIf { + self.advance(); + let cond = self.parse_expression()?; + self.eat_semicolon(); + let body = self.parse_statement_list(&[ + Token::KwElseIf, Token::KwElse, Token::KwEndIf, + ]); + elseifs.push(ElseIf { condition: cond, body }); + } + let else_body = if self.eat(&Token::KwElse) { + self.eat_semicolon(); + Some(self.parse_statement_list(&[Token::KwEndIf])) + } else { + None + }; + self.eat(&Token::KwEndIf); + self.eat_semicolon(); + Ok(Statement::If(IfStmt { condition, then_body, elseifs, else_body })) + } + + fn parse_dow(&mut self) -> Result { + self.advance(); // KwDow + let condition = self.parse_expression()?; + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwEndDo]); + self.eat(&Token::KwEndDo); + self.eat_semicolon(); + Ok(Statement::DoWhile(DoWhileStmt { condition, body })) + } + + fn parse_dou(&mut self) -> Result { + self.advance(); // KwDou + let condition = self.parse_expression()?; + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwEndDo]); + self.eat(&Token::KwEndDo); + self.eat_semicolon(); + Ok(Statement::DoUntil(DoUntilStmt { condition, body })) + } + + fn parse_for(&mut self) -> Result { + self.advance(); // KwFor + let var = self.expect_ident()?; + self.expect(&Token::OpEq)?; + let start = self.parse_expression()?; + let downto = if self.eat(&Token::KwDownTo) { + true + } else { + self.eat(&Token::KwTo); + false + }; + let limit = self.parse_expression()?; + let step = if self.eat(&Token::KwBy) { + self.parse_expression().ok() + } else { + None + }; + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwEndFor]); + self.eat(&Token::KwEndFor); + self.eat_semicolon(); + Ok(Statement::For(ForStmt { var, start, limit, step, downto, body })) + } + + fn parse_select(&mut self) -> Result { + self.advance(); // KwSelect + self.eat_semicolon(); + let mut whens = Vec::new(); + while self.peek() == &Token::KwWhen { + self.advance(); + let cond = self.parse_expression()?; + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwWhen, Token::KwOther, Token::KwEndSl]); + whens.push(WhenClause { condition: cond, body }); + } + let other = if self.eat(&Token::KwOther) { + self.eat_semicolon(); + Some(self.parse_statement_list(&[Token::KwEndSl])) + } else { + None + }; + self.eat(&Token::KwEndSl); + self.eat_semicolon(); + Ok(Statement::Select(SelectStmt { whens, other })) + } + + fn parse_monitor(&mut self) -> Result { + self.advance(); // KwMonitor + self.eat_semicolon(); + let body = self.parse_statement_list(&[Token::KwOnError]); + let mut handlers = Vec::new(); + while self.peek() == &Token::KwOnError { + self.advance(); + let mut codes = Vec::new(); + while !matches!(self.peek(), Token::Semicolon | Token::Eof) { + match self.peek().clone() { + Token::IntLit(n) => { + self.advance(); + codes.push(ErrorCode::Integer(n as u32)); + } + Token::KwProgram => { self.advance(); codes.push(ErrorCode::Program); } + Token::KwFile => { self.advance(); codes.push(ErrorCode::File); } + Token::KwAll => { self.advance(); codes.push(ErrorCode::All); } + Token::Colon => { self.advance(); } + _ => { self.advance(); } + } + } + self.eat_semicolon(); + let hbody = self.parse_statement_list(&[Token::KwOnError, Token::KwEndMon]); + handlers.push(OnError { codes, body: hbody }); + } + self.eat(&Token::KwEndMon); + self.eat_semicolon(); + Ok(Statement::Monitor(MonitorStmt { body, handlers })) + } + + fn parse_assign_eval(&mut self) -> Result { + self.advance(); // EVAL / EVALR / EVAL-CORR + // Optional (H/T/E) option + if self.peek() == &Token::LParen { + self.skip_paren_group(); + } + let target = self.parse_lvalue()?; + self.expect(&Token::OpEq)?; + let value = self.parse_expression()?; + self.eat_semicolon(); + Ok(Statement::Assign(AssignStmt { target, value })) + } + + fn parse_callp(&mut self) -> Result { + self.advance(); // KwCallP + let name = self.expect_name()?; + let args = if self.peek() == &Token::LParen { + self.parse_arg_list()? + } else { + Vec::new() + }; + self.eat_semicolon(); + Ok(Statement::CallP(CallPStmt { name, args })) + } + + fn parse_clear(&mut self) -> Result { + self.advance(); // KwClear + let lv = self.parse_lvalue()?; + self.eat_semicolon(); + Ok(Statement::Clear(lv)) + } + + fn parse_reset(&mut self) -> Result { + self.advance(); // KwReset + if self.eat(&Token::KwAll) { + self.eat_semicolon(); + return Ok(Statement::Reset(ResetStmt::All)); + } + let lv = self.parse_lvalue()?; + self.eat_semicolon(); + Ok(Statement::Reset(ResetStmt::Target(lv))) + } + + // ── I/O statements ───────────────────────────────────────────────────── + + fn maybe_parse_io_option(&mut self) -> bool { + if self.peek() == &Token::LParen { + // Could be (E) or (N) + let saved = self.pos; + self.advance(); // ( + let ok = matches!(self.peek(), Token::LitE | Token::LitN | Token::Identifier(_)); + if ok { + self.advance(); // option letter + self.eat(&Token::RParen); + true + } else { + self.pos = saved; + false + } + } else { + false + } + } + + fn parse_read(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let file = self.expect_ident()?; + let _ = self.try_parse_ident(); // optional indicator var + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Read { file })) + } + + fn parse_readp(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::ReadP { file })) + } + + fn parse_reade(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let key = self.parse_expression()?; + let file = self.expect_ident()?; + self.eat_semicolon(); + // Treat as plain READ for now + let _ = key; + Ok(Statement::Io(IoStatement::Read { file })) + } + + fn parse_readpe(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let key = self.parse_expression()?; + let file = self.expect_ident()?; + self.eat_semicolon(); + let _ = key; + Ok(Statement::Io(IoStatement::ReadP { file })) + } + + fn parse_write(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let record = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Write { record })) + } + + fn parse_update(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let record = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Update { record })) + } + + fn parse_delete(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let key = self.parse_expression()?; + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Delete { key, file })) + } + + fn parse_chain(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let key = self.parse_expression()?; + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Chain { key, file })) + } + + fn parse_setll(&mut self) -> Result { + self.advance(); + let key = match self.peek().clone() { + Token::KwStart => { self.advance(); SetKey::Start } + Token::KwEnd => { self.advance(); SetKey::End } + _ => SetKey::Expr(self.parse_expression()?), + }; + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::SetLL { key, file })) + } + + fn parse_setgt(&mut self) -> Result { + self.advance(); + let key = match self.peek().clone() { + Token::KwStart => { self.advance(); SetKey::Start } + Token::KwEnd => { self.advance(); SetKey::End } + _ => SetKey::Expr(self.parse_expression()?), + }; + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::SetGT { key, file })) + } + + fn parse_open(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Open { file })) + } + + fn parse_close(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + if self.eat(&Token::KwAll) { + self.eat_semicolon(); + return Ok(Statement::Io(IoStatement::Close { file: None })); + } + let file = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Close { file: Some(file) })) + } + + fn parse_except(&mut self) -> Result { + self.advance(); + let fmt = self.try_parse_ident(); + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::Except { format: fmt })) + } + + fn parse_exfmt(&mut self) -> Result { + self.advance(); + self.maybe_parse_io_option(); + let format = self.expect_ident()?; + self.eat_semicolon(); + Ok(Statement::Io(IoStatement::ExFmt { format })) + } + + // ── Identifier statement (assignment or implicit call) ───────────────── + + fn parse_ident_stmt(&mut self) -> Result { + let name = self.expect_name()?; + + // Could be `name(args);` — an implicit procedure call + if self.peek() == &Token::LParen { + // Peek ahead to decide: call or subscript-assignment? + // If after the matching ')' we see '=' it's an assignment, else call. + let saved = self.pos; + self.advance(); // ( + let mut depth = 1; + while depth > 0 && !self.is_eof() { + match self.peek() { + Token::LParen => { self.advance(); depth += 1; } + Token::RParen => { self.advance(); depth -= 1; } + _ => { self.advance(); } + } + } + let is_assign = self.peek() == &Token::OpEq; + self.pos = saved; // rewind + + if is_assign { + // subscript assignment: `name(idx) = expr;` + let lv = self.parse_lvalue()?; + self.expect(&Token::OpEq)?; + let value = self.parse_expression()?; + self.eat_semicolon(); + return Ok(Statement::Assign(AssignStmt { target: lv, value })); + } else { + // implicit call + let args = self.parse_arg_list()?; + self.eat_semicolon(); + return Ok(Statement::CallP(CallPStmt { name, args })); + } + } + + // qualified name assignment: `name.field = expr;` + if self.peek() == &Token::Dot { + let mut parts = vec![name]; + while self.eat(&Token::Dot) { + parts.push(self.expect_ident()?); + } + let qname = QualifiedName { parts }; + // subscript? + let lv = if self.peek() == &Token::LParen { + let mut indices = Vec::new(); + self.advance(); + indices.push(self.parse_expression()?); + while self.eat(&Token::Colon) { + indices.push(self.parse_expression()?); + } + self.eat(&Token::RParen); + LValue::Index(qname, indices) + } else { + LValue::Name(qname) + }; + self.expect(&Token::OpEq)?; + let value = self.parse_expression()?; + self.eat_semicolon(); + return Ok(Statement::Assign(AssignStmt { target: lv, value })); + } + + // Plain `name = expr;` + if self.peek() == &Token::OpEq { + self.advance(); // = + let value = self.parse_expression()?; + self.eat_semicolon(); + let lv = LValue::Name(QualifiedName::simple(name)); + return Ok(Statement::Assign(AssignStmt { target: lv, value })); + } + + Err(LowerError::new(format!("cannot parse statement starting with identifier '{}'", name))) + } + + // ── L-values ─────────────────────────────────────────────────────────── + + fn parse_lvalue(&mut self) -> Result { + let name = self.expect_name()?; + let mut parts = vec![name]; + while self.eat(&Token::Dot) { + parts.push(self.expect_name()?); + } + let qname = QualifiedName { parts }; + if self.peek() == &Token::LParen { + let mut indices = Vec::new(); + self.advance(); + indices.push(self.parse_expression()?); + while self.eat(&Token::Colon) { + indices.push(self.parse_expression()?); + } + self.eat(&Token::RParen); + Ok(LValue::Index(qname, indices)) + } else { + Ok(LValue::Name(qname)) + } + } + + // ── Expressions ──────────────────────────────────────────────────────── + + fn parse_expression(&mut self) -> Result { + self.parse_or_expr() + } + + fn parse_or_expr(&mut self) -> Result { + let mut lhs = self.parse_and_expr()?; + while self.eat(&Token::KwOr) { + let rhs = self.parse_and_expr()?; + lhs = Expression::BinOp(BinOp::Or, Box::new(lhs), Box::new(rhs)); + } + Ok(lhs) + } + + fn parse_and_expr(&mut self) -> Result { + let mut lhs = self.parse_not_expr()?; + while self.eat(&Token::KwAnd) { + let rhs = self.parse_not_expr()?; + lhs = Expression::BinOp(BinOp::And, Box::new(lhs), Box::new(rhs)); + } + Ok(lhs) + } + + fn parse_not_expr(&mut self) -> Result { + if self.eat(&Token::KwNot) { + let expr = self.parse_comparison_expr()?; + return Ok(Expression::Not(Box::new(expr))); + } + self.parse_comparison_expr() + } + + fn parse_comparison_expr(&mut self) -> Result { + let lhs = self.parse_additive_expr()?; + let op = match self.peek() { + Token::OpEq => BinOp::Eq, + Token::OpNe => BinOp::Ne, + Token::OpLt => BinOp::Lt, + Token::OpLe => BinOp::Le, + Token::OpGt => BinOp::Gt, + Token::OpGe => BinOp::Ge, + _ => return Ok(lhs), + }; + self.advance(); + let rhs = self.parse_additive_expr()?; + Ok(Expression::BinOp(op, Box::new(lhs), Box::new(rhs))) + } + + fn parse_additive_expr(&mut self) -> Result { + let mut lhs = self.parse_multiplicative_expr()?; + loop { + let op = match self.peek() { + Token::OpPlus => BinOp::Add, + Token::OpMinus => BinOp::Sub, + _ => break, + }; + self.advance(); + let rhs = self.parse_multiplicative_expr()?; + lhs = Expression::BinOp(op, Box::new(lhs), Box::new(rhs)); + } + Ok(lhs) + } + + fn parse_multiplicative_expr(&mut self) -> Result { + let mut lhs = self.parse_unary_expr()?; + loop { + let op = match self.peek() { + Token::OpStar2 => BinOp::Pow, + Token::OpStar => BinOp::Mul, + Token::OpSlash => BinOp::Div, + _ => break, + }; + self.advance(); + let rhs = self.parse_unary_expr()?; + lhs = Expression::BinOp(op, Box::new(lhs), Box::new(rhs)); + } + Ok(lhs) + } + + fn parse_unary_expr(&mut self) -> Result { + if self.eat(&Token::OpMinus) { + let e = self.parse_primary_expr()?; + return Ok(Expression::UnaryMinus(Box::new(e))); + } + if self.eat(&Token::OpPlus) { + let e = self.parse_primary_expr()?; + return Ok(Expression::UnaryPlus(Box::new(e))); + } + self.parse_primary_expr() + } + + fn parse_primary_expr(&mut self) -> Result { + match self.peek().clone() { + // Parenthesised expression + Token::LParen => { + self.advance(); + let e = self.parse_expression()?; + self.eat(&Token::RParen); + Ok(Expression::Paren(Box::new(e))) + } + + // String literal + Token::StringLit(s) => { + self.advance(); + Ok(Expression::Literal(Literal::String(s))) + } + + // Integer literal + Token::IntLit(n) => { + self.advance(); + Ok(Expression::Literal(Literal::Integer(n))) + } + + // Float literal + Token::FloatLit(f) => { + self.advance(); + Ok(Expression::Literal(Literal::Float(f))) + } + + // Hex literal + Token::HexLit(h) => { + self.advance(); + let bytes = hex_to_bytes(&h); + Ok(Expression::Literal(Literal::Hex(bytes))) + } + + // Named constants + Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | + Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | + Token::KwNull => { + let nc = self.parse_named_constant()?; + Ok(Expression::Named(nc)) + } + + // *IN(n) special + Token::KwIn2 => { + self.advance(); + if self.peek() == &Token::LParen { + self.advance(); + let e = self.parse_expression()?; + self.eat(&Token::RParen); + Ok(Expression::Special(SpecialValue::In(Box::new(e)))) + } else { + Ok(Expression::Special(SpecialValue::InAll)) + } + } + + // Other special values + Token::KwStart => { self.advance(); Ok(Expression::Special(SpecialValue::Start)) } + Token::KwEnd => { self.advance(); Ok(Expression::Special(SpecialValue::End)) } + Token::KwOmit => { self.advance(); Ok(Expression::Special(SpecialValue::Omit)) } + Token::KwThis => { self.advance(); Ok(Expression::Special(SpecialValue::This)) } + Token::KwSame => { self.advance(); Ok(Expression::Special(SpecialValue::Same)) } + Token::KwAll => { + self.advance(); + // *ALL'str' + if let Token::StringLit(s) = self.peek().clone() { + self.advance(); + Ok(Expression::Special(SpecialValue::All(s))) + } else { + Ok(Expression::Special(SpecialValue::Blanks)) + } + } + + // Built-in functions + tok if is_bif_token(&tok) => { + self.parse_builtin_expr() + } + + // Identifier (or keyword used as a name) — variable reference, + // qualified name, or call. + ref tok if token_as_name(tok).is_some() => { + let name = token_as_name(self.peek()).unwrap(); + self.advance(); + // Qualified name (dots)? + let mut parts = vec![name.clone()]; + while self.eat(&Token::Dot) { + if let Some(s) = self.try_parse_ident_or_name() { + parts.push(s); + } else { + break; + } + } + let qname = QualifiedName { parts }; + // Call `name(args)` or subscript `name(idx)`? + if self.peek() == &Token::LParen && qname.is_simple() { + let args = self.parse_arg_list()?; + Ok(Expression::Call(name, args)) + } else if self.peek() == &Token::LParen { + let mut indices = Vec::new(); + self.advance(); + indices.push(self.parse_expression()?); + while self.eat(&Token::Colon) { + indices.push(self.parse_expression()?); + } + self.eat(&Token::RParen); + Ok(Expression::Index(qname, indices)) + } else { + Ok(Expression::Variable(qname)) + } + } + + tok => { + Err(LowerError::new(format!("unexpected token in expression: {:?}", tok))) + } + } + } + + fn parse_named_constant(&mut self) -> Result { + let nc = match self.peek() { + Token::KwOn => NamedConstant::On, + Token::KwOff => NamedConstant::Off, + Token::KwBlank => NamedConstant::Blank, + Token::KwBlanks => NamedConstant::Blanks, + Token::KwZero => NamedConstant::Zero, + Token::KwZeros => NamedConstant::Zeros, + Token::KwHiVal => NamedConstant::HiVal, + Token::KwLoVal => NamedConstant::LoVal, + Token::KwNull => NamedConstant::Null, + tok => return Err(LowerError::new(format!("expected named constant, got {:?}", tok))), + }; + self.advance(); + Ok(nc) + } + + fn parse_literal(&mut self) -> Result { + match self.peek().clone() { + Token::StringLit(s) => { self.advance(); Ok(Literal::String(s)) } + Token::IntLit(n) => { self.advance(); Ok(Literal::Integer(n)) } + Token::FloatLit(f) => { self.advance(); Ok(Literal::Float(f)) } + Token::HexLit(h) => { self.advance(); Ok(Literal::Hex(hex_to_bytes(&h))) } + Token::KwOn => { self.advance(); Ok(Literal::Indicator(true)) } + Token::KwOff => { self.advance(); Ok(Literal::Indicator(false)) } + tok => Err(LowerError::new(format!("expected literal, got {:?}", tok))), + } + } + + fn parse_builtin_expr(&mut self) -> Result { + let bif_tok = self.advance(); + self.expect(&Token::LParen)?; + let bif = match bif_tok { + Token::BifLen => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Len(Box::new(e)) + } + Token::BifTrim => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Trim(Box::new(e)) + } + Token::BifTrimL => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::TrimL(Box::new(e)) + } + Token::BifTrimR => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::TrimR(Box::new(e)) + } + Token::BifChar => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Char(Box::new(e)) + } + Token::BifInt => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Int(Box::new(e)) + } + Token::BifAbs => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Abs(Box::new(e)) + } + Token::BifSqrt => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Sqrt(Box::new(e)) + } + Token::BifEof => { + let name = self.try_parse_ident(); + self.eat(&Token::RParen); + BuiltIn::Eof(name) + } + Token::BifFound => { + let name = self.try_parse_ident(); + self.eat(&Token::RParen); + BuiltIn::Found(name) + } + Token::BifError => { + self.eat(&Token::RParen); + BuiltIn::Error + } + Token::BifSize => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Size(Box::new(e)) + } + Token::BifAddr => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Addr(Box::new(e)) + } + Token::BifAlloc => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Alloc(Box::new(e)) + } + Token::BifRem => { + let a = self.parse_expression()?; + self.eat(&Token::Colon); + let b = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Rem(Box::new(a), Box::new(b)) + } + Token::BifDiv => { + let a = self.parse_expression()?; + self.eat(&Token::Colon); + let b = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Div(Box::new(a), Box::new(b)) + } + Token::BifDec => { + let e = self.parse_expression()?; + self.eat(&Token::Colon); + let d = self.parse_expression()?; + self.eat(&Token::Colon); + let f = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Dec(Box::new(e), Box::new(d), Box::new(f)) + } + Token::BifSubst => { + let s = self.parse_expression()?; + self.eat(&Token::Colon); + let start = self.parse_expression()?; + let len = if self.eat(&Token::Colon) { + Some(Box::new(self.parse_expression()?)) + } else { + None + }; + self.eat(&Token::RParen); + BuiltIn::Subst(Box::new(s), Box::new(start), len) + } + Token::BifScan => { + let p = self.parse_expression()?; + self.eat(&Token::Colon); + let src = self.parse_expression()?; + let start = if self.eat(&Token::Colon) { + Some(Box::new(self.parse_expression()?)) + } else { + None + }; + self.eat(&Token::RParen); + BuiltIn::Scan(Box::new(p), Box::new(src), start) + } + tok => { + // Generic BIF: collect all arguments + let mut args = Vec::new(); + while !matches!(self.peek(), Token::RParen | Token::Eof) { + if let Ok(e) = self.parse_expression() { + args.push(e); + } else { + self.advance(); + } + if !self.eat(&Token::Colon) { break; } + } + self.eat(&Token::RParen); + BuiltIn::Other(format!("{:?}", tok), args) + } + }; + Ok(Expression::BuiltIn(bif)) + } + + // ── Argument lists ───────────────────────────────────────────────────── + + fn parse_arg_list(&mut self) -> Result, LowerError> { + self.expect(&Token::LParen)?; + let mut args = Vec::new(); + if self.peek() == &Token::RParen { + self.advance(); + return Ok(args); + } + loop { + if self.eat(&Token::KwOmit) { + args.push(Arg::Omit); + } else { + let e = self.parse_expression()?; + args.push(Arg::Expr(e)); + } + if !self.eat(&Token::Colon) { break; } + } + self.eat(&Token::RParen); + Ok(args) + } + + // ── Helpers ──────────────────────────────────────────────────────────── + + fn expect_ident(&mut self) -> Result { + match self.advance() { + Token::Identifier(s) => Ok(s), + tok => Err(LowerError::new(format!("expected identifier, got {:?}", tok))), + } + } + + /// Like `expect_ident` but also accepts keyword tokens as names. + /// + /// RPG IV procedure names and variable names can collide with keywords + /// (e.g. `main`, `read`, `write`, `open`, `close`, `date`, `time`). + /// This helper converts any single-word keyword token back to its string + /// representation so it can be used as an identifier. + fn expect_name(&mut self) -> Result { + let tok = self.advance(); + let s = token_as_name(&tok); + if let Some(name) = s { + Ok(name) + } else { + Err(LowerError::new(format!("expected name, got {:?}", tok))) + } + } + + fn try_parse_name(&mut self) -> Option { + let s = token_as_name(self.peek())?; + self.advance(); + Some(s) + } + + fn try_parse_ident(&mut self) -> Option { + if let Token::Identifier(s) = self.peek().clone() { + self.advance(); + Some(s) + } else { + None + } + } + + /// Try to parse an identifier OR a keyword-as-name (like `main`, `read`). + fn try_parse_ident_or_name(&mut self) -> Option { + // Prefer the strict identifier form first. + if let Token::Identifier(s) = self.peek().clone() { + self.advance(); + return Some(s); + } + self.try_parse_name() + } + + fn skip_paren_group(&mut self) { + if self.peek() != &Token::LParen { return; } + self.advance(); + let mut depth = 1; + while depth > 0 && !self.is_eof() { + match self.peek() { + Token::LParen => { self.advance(); depth += 1; } + Token::RParen => { self.advance(); depth -= 1; } + _ => { self.advance(); } + } + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helper functions +// ───────────────────────────────────────────────────────────────────────────── + +/// Try to interpret any single-word token as a plain name string. +/// +/// This is needed because RPG IV procedure names and variable names can +/// collide with keywords (e.g. `main`, `read`, `time`, `date`, `open`). +/// Identifiers always win; for keyword tokens we return the canonical +/// lowercase or mixed-case spelling that the source would have used. +fn token_as_name(tok: &Token) -> Option { + match tok { + Token::Identifier(s) => Some(s.clone()), + + // Statement / declaration keywords that are commonly used as names. + Token::KwMain => Some("main".into()), + Token::KwRead => Some("read".into()), + Token::KwWrite => Some("write".into()), + Token::KwOpen => Some("open".into()), + Token::KwClose => Some("close".into()), + Token::KwDelete => Some("delete".into()), + Token::KwUpdate => Some("update".into()), + Token::KwDate => Some("date".into()), + Token::KwTime => Some("time".into()), + Token::KwTimestamp => Some("timestamp".into()), + Token::KwChar => Some("char".into()), + Token::KwInt => Some("int".into()), + Token::KwFloat => Some("float".into()), + Token::KwInd => Some("ind".into()), + Token::KwPointer => Some("pointer".into()), + Token::KwText => Some("text".into()), + Token::KwOption => Some("option".into()), + Token::KwExport => Some("export".into()), + Token::KwForce => Some("force".into()), + Token::KwPost => Some("post".into()), + Token::KwFeod => Some("feod".into()), + Token::KwUnlock => Some("unlock".into()), + Token::KwSortA => Some("sorta".into()), + Token::KwDump => Some("dump".into()), + Token::KwReset => Some("reset".into()), + Token::KwClear => Some("clear".into()), + Token::KwLeave => Some("leave".into()), + Token::KwIter => Some("iter".into()), + Token::KwReturn => Some("return".into()), + Token::KwSelect => Some("select".into()), + Token::KwWhen => Some("when".into()), + Token::KwOther => Some("other".into()), + Token::KwMonitor => Some("monitor".into()), + Token::KwFor => Some("for".into()), + Token::KwTo => Some("to".into()), + Token::KwBy => Some("by".into()), + Token::KwDownTo => Some("downto".into()), + Token::KwDsply => Some("dsply".into()), + Token::KwCallP => Some("callp".into()), + Token::KwExSr => Some("exsr".into()), + Token::KwExFmt => Some("exfmt".into()), + Token::KwExcept => Some("except".into()), + Token::KwChain => Some("chain".into()), + Token::KwSetLL => Some("setll".into()), + Token::KwSetGT => Some("setgt".into()), + Token::KwReadP => Some("readp".into()), + Token::KwReadE => Some("reade".into()), + Token::KwReadPE => Some("readpe".into()), + Token::KwCommit => Some("commit".into()), + Token::KwRollback => Some("rollback".into()), + Token::KwDeAlloc => Some("dealloc".into()), + Token::KwIf => Some("if".into()), + Token::KwElse => Some("else".into()), + Token::KwElseIf => Some("elseif".into()), + Token::KwEndIf => Some("endif".into()), + Token::KwDow => Some("dow".into()), + Token::KwDou => Some("dou".into()), + Token::KwEndDo => Some("enddo".into()), + Token::KwEndFor => Some("endfor".into()), + Token::KwEndSl => Some("endsl".into()), + Token::KwEndMon => Some("endmon".into()), + Token::KwEval => Some("eval".into()), + Token::KwEvalR => Some("evalr".into()), + Token::KwEvalCorr => Some("eval-corr".into()), + Token::KwLeaveSr => Some("leavesr".into()), + Token::KwNoMain => Some("nomain".into()), + Token::KwDftActGrp => Some("dftactgrp".into()), + Token::KwActGrp => Some("actgrp".into()), + Token::KwBndDir => Some("bnddir".into()), + Token::KwCopyright => Some("copyright".into()), + Token::KwDebug => Some("debug".into()), + Token::KwExpOpts => Some("expropts".into()), + Token::KwDatFmt => Some("datfmt".into()), + Token::KwTimFmt => Some("timfmt".into()), + Token::KwDecEdit => Some("decedit".into()), + Token::KwAlwNull => Some("alwnull".into()), + Token::KwStgMdl => Some("stgmdl".into()), + Token::KwTruncNbr => Some("truncnbr".into()), + Token::KwInz => Some("inz".into()), + Token::KwConst => Some("const".into()), + Token::KwValue => Some("value".into()), + Token::KwStatic => Some("static".into()), + Token::KwBased => Some("based".into()), + Token::KwDim => Some("dim".into()), + Token::KwAscend => Some("ascend".into()), + Token::KwDescend => Some("descend".into()), + Token::KwAltSeq => Some("altseq".into()), + Token::KwOpDesc => Some("opdesc".into()), + Token::KwNoOpt => Some("noopt".into()), + Token::KwVolatile => Some("volatile".into()), + Token::KwOptions => Some("options".into()), + Token::KwQualified => Some("qualified".into()), + Token::KwTemplate => Some("template".into()), + Token::KwExtName => Some("extname".into()), + Token::KwExtProc => Some("extproc".into()), + Token::KwLike => Some("like".into()), + Token::KwLikeDs => Some("likeds".into()), + Token::KwLikeRec => Some("likerec".into()), + Token::KwVarChar => Some("varchar".into()), + Token::KwGraph => Some("graph".into()), + Token::KwVarGraph => Some("vargraph".into()), + Token::KwUcs2 => Some("ucs2".into()), + Token::KwVarUcs2 => Some("varucs2".into()), + Token::KwPacked => Some("packed".into()), + Token::KwZoned => Some("zoned".into()), + Token::KwBindec => Some("bindec".into()), + Token::KwUns => Some("uns".into()), + Token::KwObject => Some("object".into()), + Token::KwProcPtr => Some("procptr".into()), + Token::KwLikeRec2 => Some("likerec".into()), + _ => None, + } +} + +fn is_bif_token(tok: &Token) -> bool { + matches!(tok, + Token::BifAbs | Token::BifAddr | Token::BifAlloc | Token::BifBitAnd | + Token::BifBitNot | Token::BifBitOr | Token::BifBitXor | Token::BifChar | + Token::BifCheck | Token::BifCheckR | Token::BifDate | Token::BifDays | + Token::BifDec | Token::BifDecH | Token::BifDecPos | Token::BifDiff | + Token::BifDiv | Token::BifEditC | Token::BifEditFlt | Token::BifEditW | + Token::BifElem | Token::BifEof | Token::BifEqual | Token::BifError | + Token::BifFields | Token::BifFloat | Token::BifFound | Token::BifGraph | + Token::BifHours | Token::BifInt | Token::BifIntH | Token::BifKds | + Token::BifLen | Token::BifMinutes | Token::BifMonths | Token::BifMSeconds | + Token::BifNullInd | Token::BifOccur | Token::BifOpen | Token::BifPAddr | + Token::BifParms | Token::BifReAlloc | Token::BifRem | Token::BifReplace | + Token::BifScan | Token::BifScanR | Token::BifSeconds | Token::BifShtDn | + Token::BifSize | Token::BifSqrt | Token::BifStatus | Token::BifStr | + Token::BifSubArr | Token::BifSubst | Token::BifThis | Token::BifTime | + Token::BifTimestamp | Token::BifTrim | Token::BifTrimL | Token::BifTrimR | + Token::BifUcs2 | Token::BifUns | Token::BifUnsH | Token::BifXFoot | + Token::BifXLate | Token::BifYears + ) +} + +fn hex_to_bytes(h: &str) -> Vec { + h.as_bytes() + .chunks(2) + .filter_map(|c| { + let s = std::str::from_utf8(c).ok()?; + u8::from_str_radix(s, 16).ok() + }) + .collect() +} + +// ───────────────────────────────────────────────────────────────────────────── +// Unit tests +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + fn lower_ok(src: &str) -> Program { + lower(src).expect("lower should succeed") + } + + #[test] + fn tokenize_ctl_opt() { + let tokens = tokenize("CTL-OPT DFTACTGRP(*NO);").unwrap(); + assert!(tokens.contains(&Token::KwCtlOpt)); + assert!(tokens.contains(&Token::KwDftActGrp)); + assert!(tokens.contains(&Token::KwNo)); + } + + #[test] + fn tokenize_string_literal() { + let tokens = tokenize("'Hello, World!'").unwrap(); + assert!(tokens.contains(&Token::StringLit("Hello, World!".into()))); + } + + #[test] + fn tokenize_embedded_quote() { + let tokens = tokenize("'it''s'").unwrap(); + assert!(tokens.contains(&Token::StringLit("it's".into()))); + } + + #[test] + fn tokenize_star_constants() { + let tokens = tokenize("*ON *OFF *BLANK *BLANKS *NULL").unwrap(); + assert!(tokens.contains(&Token::KwOn)); + assert!(tokens.contains(&Token::KwOff)); + assert!(tokens.contains(&Token::KwBlank)); + assert!(tokens.contains(&Token::KwBlanks)); + assert!(tokens.contains(&Token::KwNull)); + } + + #[test] + fn tokenize_bif() { + let tokens = tokenize("%TRIM(name)").unwrap(); + assert!(tokens.contains(&Token::BifTrim)); + } + + #[test] + fn lower_ctl_opt() { + let p = lower_ok("CTL-OPT DFTACTGRP(*NO);"); + assert_eq!(p.declarations.len(), 1); + if let Declaration::ControlSpec(cs) = &p.declarations[0] { + assert!(cs.keywords.iter().any(|k| matches!(k, CtlKeyword::DftActGrp(false)))); + } else { + panic!("expected ControlSpec"); + } + } + + #[test] + fn lower_ctl_opt_nomain() { + let p = lower_ok("CTL-OPT NOMAIN;"); + if let Declaration::ControlSpec(cs) = &p.declarations[0] { + assert!(cs.keywords.iter().any(|k| matches!(k, CtlKeyword::NoMain))); + } + } + + #[test] + fn lower_dcl_s_char_inz() { + let p = lower_ok("DCL-S greeting CHAR(25) INZ('Hello, World!');"); + assert_eq!(p.declarations.len(), 1); + if let Declaration::Standalone(decl) = &p.declarations[0] { + assert_eq!(decl.name, "greeting"); + assert!(matches!(decl.ty, TypeSpec::Char(_))); + assert!(decl.keywords.iter().any(|k| matches!(k, VarKeyword::InzExpr(_)))); + } else { + panic!("expected Standalone"); + } + } + + #[test] + fn lower_dcl_s_int() { + let p = lower_ok("DCL-S counter INT(10) INZ(0);"); + if let Declaration::Standalone(decl) = &p.declarations[0] { + assert_eq!(decl.name, "counter"); + assert!(matches!(decl.ty, TypeSpec::Int(_))); + } + } + + #[test] + fn lower_dcl_c() { + let p = lower_ok("DCL-C MAX_SIZE CONST(100);"); + if let Declaration::Constant(c) = &p.declarations[0] { + assert_eq!(c.name, "MAX_SIZE"); + } + } + + #[test] + fn lower_dcl_proc_export() { + let src = "DCL-PROC main EXPORT;\n RETURN;\nEND-PROC;"; + let p = lower_ok(src); + assert_eq!(p.procedures.len(), 1); + let proc = &p.procedures[0]; + assert_eq!(proc.name, "main"); + assert!(proc.exported); + assert_eq!(proc.body.len(), 1); + assert!(matches!(proc.body[0], Statement::Return(_))); + } + + #[test] + fn lower_dsply() { + let src = "DCL-PROC main EXPORT;\n DSPLY greeting;\n RETURN;\nEND-PROC;"; + let p = lower_ok(src); + let proc = &p.procedures[0]; + assert!(matches!(proc.body[0], Statement::Dsply(_))); + } + + #[test] + fn lower_hello_rpg() { + let hello = include_str!("../hello.rpg"); + let p = lower_ok(hello); + assert!(!p.procedures.is_empty(), "should have at least one procedure"); + let proc = p.procedures.iter().find(|p| p.name == "main").expect("main proc"); + assert!(proc.exported); + // Should have DSPLY and RETURN + assert!(proc.body.iter().any(|s| matches!(s, Statement::Dsply(_)))); + assert!(proc.body.iter().any(|s| matches!(s, Statement::Return(_)))); + } + + #[test] + fn lower_if_stmt() { + let src = "DCL-PROC p EXPORT;\n IF x=1;\n RETURN;\n ENDIF;\nEND-PROC;"; + let p = lower_ok(src); + let proc = &p.procedures[0]; + assert!(matches!(proc.body[0], Statement::If(_))); + } + + #[test] + fn lower_dow_stmt() { + let src = "DCL-PROC p EXPORT;\n DOW x>0;\n LEAVE;\n ENDDO;\nEND-PROC;"; + let p = lower_ok(src); + let proc = &p.procedures[0]; + assert!(matches!(proc.body[0], Statement::DoWhile(_))); + } + + #[test] + fn lower_assign_stmt() { + let src = "DCL-PROC p EXPORT;\n x=1;\n RETURN;\nEND-PROC;"; + let p = lower_ok(src); + assert!(matches!(p.procedures[0].body[0], Statement::Assign(_))); + } +} diff --git a/src/main.rs b/src/main.rs index 9fe8c7c..eb94f6c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,11 @@ //! rust-langrpg — RPG IV compiler CLI //! -//! Parses one or more RPG IV source files using the embedded BNF grammar -//! and optionally writes the resulting parse tree to an output file. +//! Full compilation pipeline: +//! source (.rpg) +//! → BNF validation (bnf crate) +//! → AST lowering (lower.rs) +//! → LLVM IR / object (codegen.rs via inkwell) +//! → native executable (cc linker + librpgrt.so runtime) //! //! ## Usage //! @@ -9,46 +13,80 @@ //! rust-langrpg [OPTIONS] ... //! //! Arguments: -//! ... RPG IV source file(s) to parse +//! ... RPG IV source file(s) to compile //! //! Options: -//! -o Write the parse tree to this file -//! -h, --help Print help -//! -V, --version Print version +//! -o Output executable path [default: a.out] +//! --emit-ir Print LLVM IR to stdout instead of producing a binary +//! --emit-tree Print BNF parse tree to stdout instead of compiling +//! -O Optimisation level 0-3 [default: 0] +//! --no-link Produce a .o object file, skip linking +//! --runtime Path to librpgrt.so [default: auto-detect] +//! -h, --help Print help +//! -V, --version Print version //! ``` //! //! ## Example //! //! ```text -//! cargo run --release -- -o out.txt hello.rpg +//! cargo run --release -- -o main hello.rpg +//! ./main +//! DSPLY Hello, World! //! ``` use std::{ fs, - io::{self, Write}, + path::PathBuf, process, }; -use clap::Parser; -use rust_langrpg::{load_grammar, parse_as}; +use clap::Parser as ClapParser; +use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as}; // ───────────────────────────────────────────────────────────────────────────── // CLI definition // ───────────────────────────────────────────────────────────────────────────── -/// RPG IV free-format compiler — parses source files and emits parse trees. -#[derive(Parser, Debug)] -#[command(name = "rust-langrpg", version, about, long_about = None)] +/// RPG IV free-format compiler — produces native Linux executables from RPG IV +/// source files using LLVM as the back-end. +#[derive(ClapParser, Debug)] +#[command( + name = "rust-langrpg", + version, + about = "RPG IV compiler (LLVM back-end)", + long_about = None, +)] struct Cli { - /// RPG IV source file(s) to parse. + /// RPG IV source file(s) to compile. #[arg(required = true, value_name = "SOURCES")] sources: Vec, - /// Write the parse tree(s) to this file. - /// If omitted the tree is not printed. + /// Write the output executable (or object with --no-link) to this path. + /// If omitted the binary is written to `a.out`. #[arg(short = 'o', value_name = "OUTPUT")] output: Option, + + /// Emit LLVM IR text to stdout instead of compiling to a binary. + #[arg(long = "emit-ir")] + emit_ir: bool, + + /// Emit the BNF parse tree to stdout instead of compiling. + #[arg(long = "emit-tree")] + emit_tree: bool, + + /// Optimisation level: 0 = none, 1 = less, 2 = default, 3 = aggressive. + #[arg(short = 'O', default_value = "0", value_name = "LEVEL")] + opt_level: u8, + + /// Produce a `.o` object file but do not invoke the linker. + #[arg(long = "no-link")] + no_link: bool, + + /// Path to the `librpgrt.so` runtime shared library. + /// If not specified the compiler searches in common locations. + #[arg(long = "runtime", value_name = "PATH")] + runtime: Option, } // ───────────────────────────────────────────────────────────────────────────── @@ -58,7 +96,7 @@ struct Cli { fn main() { let cli = Cli::parse(); - // ── Load grammar ───────────────────────────────────────────────────────── + // ── Load and build the BNF grammar ─────────────────────────────────────── let grammar = match load_grammar() { Ok(g) => g, Err(e) => { @@ -67,61 +105,139 @@ fn main() { } }; - // ── Build parser ───────────────────────────────────────────────────────── - let parser = match grammar.build_parser() { + let bnf_parser = match grammar.build_parser() { Ok(p) => p, Err(e) => { - eprintln!("error: failed to build parser: {e}"); + eprintln!("error: failed to build BNF parser: {e}"); process::exit(1); } }; - // ── Open output sink ────────────────────────────────────────────────────── - // `output` is Box so we can use either a file or a sink that - // discards everything when -o was not supplied. - let mut output: Box = match &cli.output { - Some(path) => { - let file = fs::File::create(path).unwrap_or_else(|e| { - eprintln!("error: cannot open output file '{}': {e}", path.display()); - process::exit(1); - }); - Box::new(io::BufWriter::new(file)) - } - None => Box::new(io::sink()), - }; - - // ── Process each source file ────────────────────────────────────────────── + // ── Process each source file ───────────────────────────────────────────── let mut any_error = false; - for path in &cli.sources { - let source = match fs::read_to_string(path) { + for source_path in &cli.sources { + let source_text = match fs::read_to_string(source_path) { Ok(s) => s, Err(e) => { - eprintln!("error: cannot read '{}': {e}", path.display()); + eprintln!("error: cannot read '{}': {e}", source_path.display()); any_error = true; continue; } }; - // Try the top-level "program" rule first; fall back to "source-file" - // so the binary is useful even if only one of those rule names exists - // in the grammar. - let tree = parse_as(&parser, source.trim(), "program") - .or_else(|| parse_as(&parser, source.trim(), "source-file")); + // ── BNF validation ──────────────────────────────────────────────────── + let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program") + .or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file")); - match tree { - Some(t) => { - eprintln!("ok: {}", path.display()); - writeln!(output, "=== {} ===", path.display()) - .and_then(|_| writeln!(output, "{t}")) - .unwrap_or_else(|e| { - eprintln!("error: write failed: {e}"); - any_error = true; - }); - } - None => { - eprintln!("error: '{}' did not match the RPG IV grammar", path.display()); + if tree_opt.is_none() { + eprintln!( + "error: '{}' did not match the RPG IV grammar", + source_path.display() + ); + any_error = true; + continue; + } + + // ── --emit-tree: print parse tree and stop ──────────────────────────── + if cli.emit_tree { + println!("=== {} ===", source_path.display()); + println!("{}", tree_opt.unwrap()); + eprintln!("ok: {} (parse tree emitted)", source_path.display()); + continue; + } + + eprintln!("ok: {} (BNF valid)", source_path.display()); + + // ── Lower to typed AST ──────────────────────────────────────────────── + let program = match lower(source_text.trim()) { + Ok(p) => p, + Err(e) => { + eprintln!("error: lowering '{}' failed: {e}", source_path.display()); any_error = true; + continue; + } + }; + + eprintln!( + "ok: {} ({} declaration(s), {} procedure(s))", + source_path.display(), + program.declarations.len(), + program.procedures.len(), + ); + + // ── --emit-ir: print LLVM IR and stop ──────────────────────────────── + if cli.emit_ir { + match codegen::emit_ir(&program) { + Ok(ir) => { + print!("{}", ir); + } + Err(e) => { + eprintln!("error: IR emission failed for '{}': {e}", source_path.display()); + any_error = true; + } + } + continue; + } + + // ── Determine output path ───────────────────────────────────────────── + let out_path = if cli.no_link { + // Object file: replace source extension with .o + let mut p = cli.output.clone().unwrap_or_else(|| { + let mut base = source_path.clone(); + base.set_extension("o"); + base + }); + if p.extension().and_then(|e| e.to_str()) != Some("o") { + p.set_extension("o"); + } + p + } else { + // Executable: use -o, or default to a.out + cli.output.clone().unwrap_or_else(|| PathBuf::from("a.out")) + }; + + // ── Compile to object file ──────────────────────────────────────────── + let obj_path: PathBuf = if cli.no_link { + out_path.clone() + } else { + // Temporary object file alongside the final binary. + let stem = source_path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("rpg_prog"); + let mut tmp = std::env::temp_dir(); + tmp.push(format!("{}.rpg.o", stem)); + tmp + }; + + match codegen::compile_to_object(&program, &obj_path, cli.opt_level) { + Ok(()) => { + eprintln!("ok: object → {}", obj_path.display()); + } + Err(e) => { + eprintln!( + "error: codegen failed for '{}': {e}", + source_path.display() + ); + any_error = true; + continue; + } + } + + // ── Link if requested ───────────────────────────────────────────────── + if !cli.no_link { + let runtime = find_runtime(cli.runtime.as_deref()); + match link_executable(&obj_path, &out_path, runtime.as_deref()) { + Ok(()) => { + eprintln!("ok: executable → {}", out_path.display()); + // Clean up the temporary object. + let _ = fs::remove_file(&obj_path); + } + Err(msg) => { + eprintln!("error: linking failed: {msg}"); + any_error = true; + } } } } @@ -130,3 +246,206 @@ fn main() { process::exit(1); } } + +// ───────────────────────────────────────────────────────────────────────────── +// Linker invocation +// ───────────────────────────────────────────────────────────────────────────── + +/// Invoke the system C compiler to link `obj_path` into `exe_path`. +/// +/// We use `cc` (which wraps the system linker) rather than calling `ld` +/// directly so that the C runtime startup files (`crt0.o`, `crti.o`, etc.) are +/// included automatically — this is the same approach Clang uses when building +/// executables. +fn link_executable( + obj_path: &std::path::Path, + exe_path: &std::path::Path, + runtime: Option<&std::path::Path>, +) -> Result<(), String> { + let mut cmd = process::Command::new("cc"); + + cmd.arg(obj_path) + .arg("-o") + .arg(exe_path); + + // Link against the RPG runtime shared library. + match runtime { + Some(rt) => { + // Explicit path: use -L -lrpgrt (or pass the .so directly). + if rt.is_file() { + // Absolute path to the .so — pass directly. + cmd.arg(rt); + } else if rt.is_dir() { + cmd.arg(format!("-L{}", rt.display())) + .arg("-lrpgrt"); + } else { + cmd.arg(format!("-L{}", rt.display())) + .arg("-lrpgrt"); + } + } + None => { + // No explicit runtime specified — link against libc only. + // The program will need librpgrt.so to be in LD_LIBRARY_PATH at + // runtime, or the user must build and install it separately. + cmd.arg("-lc"); + } + } + + // Allow the runtime library to be found at execution time relative to the + // executable (rpath tricks). + if let Some(rt) = runtime { + if let Some(dir) = rt.parent() { + let rpath = format!("-Wl,-rpath,{}", dir.display()); + cmd.arg(rpath); + } + } + + let status = cmd + .status() + .map_err(|e| format!("could not run linker `cc`: {e}"))?; + + if status.success() { + Ok(()) + } else { + Err(format!("`cc` exited with status {}", status)) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Runtime library discovery +// ───────────────────────────────────────────────────────────────────────────── + +/// Search for `librpgrt.so` in well-known locations. +/// +/// Checked in order: +/// 1. `RPGRT_LIB` environment variable +/// 2. Same directory as the compiler executable +/// 3. `target/debug/` or `target/release/` relative to the current directory +/// (useful when running via `cargo run`) +/// 4. `/usr/local/lib` +/// 5. `/usr/lib` +fn find_runtime(explicit: Option<&std::path::Path>) -> Option { + // Honour an explicitly supplied path first. + if let Some(p) = explicit { + return Some(p.to_path_buf()); + } + + // Check the environment variable. + if let Ok(val) = std::env::var("RPGRT_LIB") { + let p = PathBuf::from(val); + if p.exists() { + return Some(p); + } + } + + // Probe standard locations. + let candidates = [ + // Alongside the running binary. + std::env::current_exe() + .ok() + .and_then(|e| e.parent().map(|d| d.join("librpgrt.so"))), + // Cargo target directories. + Some(PathBuf::from("target/debug/librpgrt.so")), + Some(PathBuf::from("target/release/librpgrt.so")), + Some(PathBuf::from("target/debug/deps/librpgrt.so")), + // System-wide. + Some(PathBuf::from("/usr/local/lib/librpgrt.so")), + Some(PathBuf::from("/usr/lib/librpgrt.so")), + ]; + + for candidate in candidates.into_iter().flatten() { + if candidate.exists() { + return Some(candidate); + } + } + + None +} + +// ───────────────────────────────────────────────────────────────────────────── +// Integration smoke test (compile-time only — no process spawning needed) +// ───────────────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use rust_langrpg::{codegen::emit_ir, lower::lower}; + + /// The hello.rpg from the repository root must compile all the way through + /// to LLVM IR without errors. + #[test] + fn hello_rpg_emits_ir() { + let src = include_str!("../hello.rpg"); + let prog = lower(src.trim()).expect("lower hello.rpg"); + let ir = emit_ir(&prog).expect("emit_ir hello.rpg"); + + // The IR must define at least one function. + assert!( + ir.contains("define"), + "IR should contain at least one function definition:\n{}", + &ir[..ir.len().min(1000)] + ); + + // The IR must reference the dsply runtime call. + assert!( + ir.contains("rpg_dsply"), + "IR should reference rpg_dsply:\n{}", + &ir[..ir.len().min(1000)] + ); + + // There must be a C main() wrapper so the binary is directly executable. + assert!( + ir.contains("@main"), + "IR should contain a @main entry point:\n{}", + &ir[..ir.len().min(1000)] + ); + } + + /// A minimal RPG IV program with an integer variable and a loop must + /// compile to IR that contains branch instructions (i.e. the loop was + /// actually code-generated, not silently dropped). + #[test] + fn loop_program_emits_branches() { + let src = r#" +CTL-OPT DFTACTGRP(*NO); + +DCL-S counter INT(10) INZ(0); + +DCL-PROC main EXPORT; + DCL-S i INT(10); + FOR i = 1 TO 10; + counter = counter + i; + ENDFOR; + RETURN; +END-PROC; +"#; + let prog = lower(src.trim()).expect("lower loop program"); + let ir = emit_ir(&prog).expect("emit_ir loop program"); + assert!( + ir.contains("br "), + "loop IR should contain branch instructions:\n{}", + &ir[..ir.len().min(2000)] + ); + } + + /// An IF/ELSE conditional must produce a conditional branch in the IR. + #[test] + fn conditional_program_emits_conditional_branch() { + let src = r#" +DCL-PROC check EXPORT; + DCL-S x INT(10) INZ(5); + IF x = 5; + RETURN; + ELSE; + RETURN; + ENDIF; +END-PROC; +"#; + let prog = lower(src.trim()).expect("lower conditional program"); + let ir = emit_ir(&prog).expect("emit_ir conditional program"); + assert!( + ir.contains("br i1"), + "conditional IR should contain 'br i1':\n{}", + &ir[..ir.len().min(2000)] + ); + } +} diff --git a/tests/hello_rpg.rs b/tests/hello_rpg.rs index 4f31361..ebb872a 100644 --- a/tests/hello_rpg.rs +++ b/tests/hello_rpg.rs @@ -1,4 +1,7 @@ //! Integration tests for the compiler binary against the Hello World program. +//! +//! These tests exercise the full compilation pipeline: +//! hello.rpg → BNF validation → AST lowering → LLVM codegen → native binary use std::process::Command; @@ -25,11 +28,12 @@ fn run(args: &[&str]) -> std::process::Output { // Tests // ───────────────────────────────────────────────────────────────────────────── -/// The compiler should exit 0 when given hello.rpg (no -o flag — tree is -/// discarded but the parse must still succeed). +/// The compiler should exit 0 when given hello.rpg (no -o flag — the output +/// executable is written to a.out but the important thing is no error). #[test] fn hello_rpg_exits_ok() { - let out = run(&[HELLO_RPG]); + let out_path = std::env::temp_dir().join("hello_rpg_exits_ok.out"); + let out = run(&["-o", out_path.to_str().unwrap(), HELLO_RPG]); assert!( out.status.success(), "expected exit 0 for hello.rpg\nstderr: {}", @@ -37,11 +41,11 @@ fn hello_rpg_exits_ok() { ); } -/// When -o is supplied the output file must be created and contain a non-empty -/// parse tree. +/// When -o is supplied the output file must be created as a non-empty compiled +/// artifact (executable binary). #[test] -fn hello_rpg_writes_tree() { - let out_path = std::env::temp_dir().join("hello_rpg_test_tree.txt"); +fn hello_rpg_produces_output_file() { + let out_path = std::env::temp_dir().join("hello_rpg_test_output.out"); let out = run(&["-o", out_path.to_str().unwrap(), HELLO_RPG]); @@ -51,30 +55,151 @@ fn hello_rpg_writes_tree() { String::from_utf8_lossy(&out.stderr), ); - let tree = std::fs::read_to_string(&out_path) - .unwrap_or_else(|e| panic!("could not read output file '{}': {e}", out_path.display())); - assert!( - !tree.trim().is_empty(), - "output file is empty — expected a parse tree", + out_path.exists(), + "output file '{}' was not created", + out_path.display(), ); - // The tree should reference at least the top-level non-terminal. + let metadata = std::fs::metadata(&out_path) + .unwrap_or_else(|e| panic!("could not stat output file: {e}")); + assert!( - tree.contains("program"), - "parse tree does not mention :\n{tree}", + metadata.len() > 0, + "output file is empty — expected a compiled artifact", ); } -/// The compiler must print the file name to stderr as "ok: hello.rpg" (or the -/// full path) when the parse succeeds. +/// The compiler must print the file name to stderr with an "ok:" prefix when +/// BNF validation succeeds. #[test] fn hello_rpg_reports_ok_on_stderr() { - let out = run(&[HELLO_RPG]); + let out_path = std::env::temp_dir().join("hello_rpg_reports_ok.out"); + let out = run(&["-o", out_path.to_str().unwrap(), HELLO_RPG]); let stderr = String::from_utf8_lossy(&out.stderr); assert!( - stderr.starts_with("ok:"), - "expected stderr to start with 'ok:'\ngot: {stderr}", + stderr.contains("ok:"), + "expected stderr to contain 'ok:'\ngot: {stderr}", + ); +} + +/// `--emit-ir` must print LLVM IR to stdout and exit 0. +/// +/// The IR must contain: +/// * At least one `define` (a function definition) +/// * A reference to `rpg_dsply` (the DSPLY runtime call) +/// * A `@main` entry point (the C main wrapper) +#[test] +fn hello_rpg_emit_ir() { + let out = run(&["--emit-ir", HELLO_RPG]); + + assert!( + out.status.success(), + "expected exit 0 with --emit-ir\nstderr: {}", + String::from_utf8_lossy(&out.stderr), + ); + + let ir = String::from_utf8_lossy(&out.stdout); + + assert!( + ir.contains("define"), + "--emit-ir should produce at least one LLVM function definition\nIR:\n{}", + &ir[..ir.len().min(2000)], + ); + + assert!( + ir.contains("rpg_dsply"), + "--emit-ir should reference the rpg_dsply runtime symbol\nIR:\n{}", + &ir[..ir.len().min(2000)], + ); + + assert!( + ir.contains("@main"), + "--emit-ir should contain a @main entry-point wrapper\nIR:\n{}", + &ir[..ir.len().min(2000)], + ); +} + +/// `--emit-tree` must print the BNF parse tree to stdout and exit 0. +/// +/// The tree must mention `program` (the top-level grammar rule). +#[test] +fn hello_rpg_emit_tree() { + let out = run(&["--emit-tree", HELLO_RPG]); + + assert!( + out.status.success(), + "expected exit 0 with --emit-tree\nstderr: {}", + String::from_utf8_lossy(&out.stderr), + ); + + let tree = String::from_utf8_lossy(&out.stdout); + + assert!( + !tree.trim().is_empty(), + "--emit-tree output is empty — expected a parse tree", + ); + + assert!( + tree.contains("program"), + "--emit-tree output should reference the rule\n{}", + &tree[..tree.len().min(1000)], + ); +} + +/// `--no-link` should produce a `.o` object file and exit 0. +#[test] +fn hello_rpg_no_link_produces_object() { + let obj_path = std::env::temp_dir().join("hello_rpg_test.o"); + + let out = run(&["--no-link", "-o", obj_path.to_str().unwrap(), HELLO_RPG]); + + assert!( + out.status.success(), + "expected exit 0 with --no-link\nstderr: {}", + String::from_utf8_lossy(&out.stderr), + ); + + assert!( + obj_path.exists(), + "object file '{}' was not created", + obj_path.display(), + ); + + let metadata = std::fs::metadata(&obj_path) + .unwrap_or_else(|e| panic!("could not stat object file: {e}")); + + assert!( + metadata.len() > 0, + "object file is empty — expected compiled LLVM output", + ); + + // A valid ELF object file starts with the ELF magic bytes 0x7f 'E' 'L' 'F'. + let bytes = std::fs::read(&obj_path) + .unwrap_or_else(|e| panic!("could not read object file: {e}")); + + assert!( + bytes.starts_with(b"\x7fELF"), + "expected an ELF object file, got unexpected magic bytes: {:?}", + &bytes[..bytes.len().min(4)], + ); +} + +/// Passing a non-existent file should cause the compiler to exit non-zero and +/// print an error to stderr. +#[test] +fn nonexistent_source_exits_error() { + let out = run(&["no_such_file_xyz.rpg"]); + + assert!( + !out.status.success(), + "expected non-zero exit for a missing source file", + ); + + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error"), + "expected an error message on stderr\ngot: {stderr}", ); }