add: fib sample

This commit is contained in:
2026-03-12 22:19:42 -07:00
parent 073c86d784
commit 31a6c8b91b
7 changed files with 756 additions and 46 deletions

View File

@@ -220,6 +220,8 @@ pub enum VarKeyword {
/// `INZ(*named-constant)` — initialise to named constant.
InzNamed(NamedConstant),
Static,
/// `DIM(n)` — declares the variable as an array with `n` elements.
Dim(Expression),
Other(String),
}
@@ -608,6 +610,8 @@ pub enum BuiltIn {
Rem(Box<Expression>, Box<Expression>),
/// `%DIV(a:b)`.
Div(Box<Expression>, Box<Expression>),
/// `%ELEM(array)` — number of elements in an array.
Elem(Box<Expression>),
/// Any built-in we haven't individually modelled.
Other(String, Vec<Expression>),
}

View File

@@ -77,6 +77,7 @@ pub fn compile_to_object(
module,
builder,
globals: HashMap::new(),
array_dims: HashMap::new(),
string_cache: HashMap::new(),
global_inits: Vec::new(),
};
@@ -139,6 +140,7 @@ pub fn emit_ir(program: &Program) -> Result<String, CodegenError> {
module,
builder,
globals: HashMap::new(),
array_dims: HashMap::new(),
string_cache: HashMap::new(),
global_inits: Vec::new(),
};
@@ -180,6 +182,9 @@ struct Codegen<'ctx> {
builder: Builder<'ctx>,
/// Module-scope global variables name -> (alloca/global ptr, TypeSpec)
globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>,
/// Array dimension table: variable name -> number of elements.
/// Populated when a `DIM(n)` keyword is encountered.
array_dims: HashMap<String, u64>,
/// Interned string literal globals (content -> global ptr).
string_cache: HashMap<String, PointerValue<'ctx>>,
/// Global declarations that need runtime initialisation (INZ with a value).
@@ -228,6 +233,25 @@ impl<'ctx> Codegen<'ctx> {
// function is available when we build the @llvm.global_ctors entry.
self.gen_global_init_fn()?;
// Determine the entry-point procedure name.
//
// Priority order:
// 1. `CTL-OPT MAIN(name)` — explicit entry point declaration.
// 2. The first EXPORT-ed procedure (legacy / hello.rpg style).
let ctl_main: Option<String> = program.declarations.iter().find_map(|d| {
if let Declaration::ControlSpec(cs) = d {
cs.keywords.iter().find_map(|kw| {
if let CtlKeyword::Main(name) = kw {
Some(name.clone())
} else {
None
}
})
} else {
None
}
});
// Generate each procedure.
let mut exported_name: Option<String> = None;
for proc in &program.procedures {
@@ -237,8 +261,10 @@ impl<'ctx> Codegen<'ctx> {
self.gen_procedure(proc)?;
}
// Emit a C `main()` wrapper that calls the exported entry point.
if let Some(name) = exported_name {
// Emit a C `main()` wrapper that calls the entry point.
// CTL-OPT MAIN(name) takes priority over EXPORT.
let entry = ctl_main.or(exported_name);
if let Some(name) = entry {
self.gen_main_wrapper(&name)?;
}
@@ -278,6 +304,18 @@ impl<'ctx> Codegen<'ctx> {
false,
);
self.module.add_function("memset", memset_ty, None);
// void rpg_dsply_i64(i64 n) — display an integer
let dsply_i64_ty = void_t.fn_type(&[i64_t.into()], false);
self.module.add_function("rpg_dsply_i64", dsply_i64_ty, None);
// i8* rpg_char_i64(i64 n) — format integer to null-terminated C string
let char_i64_ty = i8_ptr.fn_type(&[i64_t.into()], false);
self.module.add_function("rpg_char_i64", char_i64_ty, None);
// i8* rpg_concat(i8* a, i8* b) — concatenate two C strings
let concat_ty = i8_ptr.fn_type(&[i8_ptr.into(), i8_ptr.into()], false);
self.module.add_function("rpg_concat", concat_ty, None);
}
// ── Global declarations ─────────────────────────────────────────────────
@@ -384,7 +422,28 @@ impl<'ctx> Codegen<'ctx> {
fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> {
match decl {
Declaration::Standalone(sd) => {
let ptr = self.alloca_for_type(&sd.ty, &sd.name);
// Check if a DIM(n) keyword is present — if so we allocate a
// contiguous block of `n * elem_size` bytes.
let dim = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim(expr) = kw {
const_int_from_expr(expr)
} else {
None
}
});
let ptr = if let Some(n) = dim {
// Array: allocate n elements of the element type.
self.alloca_for_type_dim(&sd.ty, &sd.name, n)
} else {
self.alloca_for_type(&sd.ty, &sd.name)
};
// Record the dimension so %Elem and indexing can use it.
if let Some(n) = dim {
self.array_dims.insert(sd.name.clone(), n);
}
// Apply initialiser if any.
for kw in &sd.keywords {
match kw {
@@ -415,6 +474,35 @@ impl<'ctx> Codegen<'ctx> {
self.builder.build_alloca(arr_ty, name).unwrap()
}
/// Allocate storage for an array of `n` elements of type `ty`.
fn alloca_for_type_dim(&self, ty: &TypeSpec, name: &str, n: u64) -> PointerValue<'ctx> {
let elem_size = ty.byte_size().unwrap_or(8) as u32;
let total = elem_size * (n as u32);
let arr_ty = self.context.i8_type().array_type(total);
self.builder.build_alloca(arr_ty, name).unwrap()
}
/// Return a pointer to element `index` (1-based, RPG convention) of array `base_ptr`.
/// `elem_size` is the byte size of one element.
fn array_elem_ptr(
&self,
base_ptr: PointerValue<'ctx>,
index: inkwell::values::IntValue<'ctx>,
elem_size: u64,
) -> PointerValue<'ctx> {
let i64_t = self.context.i64_type();
// RPG arrays are 1-based — subtract 1 to get a 0-based byte offset.
let one = i64_t.const_int(1, false);
let zero_based = self.builder.build_int_sub(index, one, "idx0").unwrap();
let elem_bytes = i64_t.const_int(elem_size, false);
let byte_off = self.builder.build_int_mul(zero_based, elem_bytes, "byte_off").unwrap();
unsafe {
self.builder
.build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr")
.unwrap()
}
}
fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> {
let size = ty.byte_size().unwrap_or(0);
if size == 0 { return Ok(()); }
@@ -604,8 +692,11 @@ impl<'ctx> Codegen<'ctx> {
self.builder.position_at_end(bb);
// Call the RPG entry procedure.
let rpg_fn_name = format!("rpg_{}", rpg_entry);
if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) {
// Try the bare name first (CTL-OPT MAIN procedures are not renamed),
// then the `rpg_` prefix used for EXPORT-ed procedures.
let callee = self.module.get_function(rpg_entry)
.or_else(|| self.module.get_function(&format!("rpg_{}", rpg_entry)));
if let Some(rpg_fn) = callee {
self.builder.build_call(rpg_fn, &[], "call_rpg").ok();
}
@@ -699,13 +790,31 @@ impl<'ctx> Codegen<'ctx> {
self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok();
}
other => {
// Evaluate as integer-like expression and display it.
// Evaluate the expression; dispatch to the right display helper.
if let Ok(val) = self.gen_expression(other, state) {
// For now just call dsply_cstr on an empty string as fallback.
let _ = val;
let empty = self.intern_string("");
let zero = self.context.i64_type().const_zero();
self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
match val {
BasicValueEnum::PointerValue(ptr) => {
// String pointer — use rpg_dsply_cstr.
if let Some(dsply_cstr) = self.module.get_function("rpg_dsply_cstr") {
self.builder.build_call(dsply_cstr, &[ptr.into()], "dsply_cstr").ok();
}
}
BasicValueEnum::IntValue(iv) => {
// Integer — use rpg_dsply_i64.
if let Some(dsply_i64) = self.module.get_function("rpg_dsply_i64") {
let ext = self.builder
.build_int_s_extend(iv, self.context.i64_type(), "dsply_ext")
.unwrap_or(iv);
self.builder.build_call(dsply_i64, &[ext.into()], "dsply_i64").ok();
}
}
_ => {
// Fallback: display an empty string.
let empty = self.intern_string("");
let zero = self.context.i64_type().const_zero();
self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
}
}
}
}
}
@@ -723,6 +832,20 @@ impl<'ctx> Codegen<'ctx> {
// Clone to avoid borrow issues.
let ty = ty.clone();
// If the LValue has an index (array assignment), compute the element pointer.
let dest_ptr = if let LValue::Index(_, indices) = &a.target {
if let Some(idx_expr) = indices.first() {
let elem_size = ty.byte_size().unwrap_or(8);
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
self.array_elem_ptr(ptr, idx_i, elem_size)
} else {
ptr
}
} else {
ptr
};
match &ty {
TypeSpec::Char(size_expr) => {
if let Expression::Literal(Literal::String(s)) = &a.value {
@@ -733,16 +856,16 @@ impl<'ctx> Codegen<'ctx> {
let src = self.intern_bytes(&padded);
let memcpy = self.module.get_function("memcpy").unwrap();
let len = self.context.i64_type().const_int(field_len as u64, false);
self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok();
self.builder.build_call(memcpy, &[dest_ptr.into(), src.into(), len.into()], "assign").ok();
}
}
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let val = self.gen_expression(&a.value, state)?;
self.store_value(ptr, val, &ty);
self.store_value(dest_ptr, val, &ty);
}
_ => {
if let Ok(val) = self.gen_expression(&a.value, state) {
self.store_value(ptr, val, &ty);
self.store_value(dest_ptr, val, &ty);
}
}
}
@@ -893,7 +1016,10 @@ impl<'ctx> Codegen<'ctx> {
let start = self.gen_expression(&f.start, state)?;
let start_i = self.coerce_to_i64(start);
self.builder.build_store(loop_var, start_i).ok();
state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10))))));
// Store the loop variable with Int(20) so that byte_size() returns 8,
// matching the i64 alloca above. (Int(10) would give 4 bytes, causing
// a 32-bit load from an 8-byte slot.)
state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(20))))));
let cond_bb = self.context.append_basic_block(func, "for_cond");
let body_bb = self.context.append_basic_block(func, "for_body");
@@ -1062,9 +1188,12 @@ impl<'ctx> Codegen<'ctx> {
let llvm_ty = self.type_spec_to_llvm(&ty)
.unwrap_or(BasicTypeEnum::IntType(i64_t));
match &ty {
TypeSpec::Int(w) | TypeSpec::Uns(w) => {
let width = const_int_from_expr(w).unwrap_or(8);
let int_ty = self.context.custom_width_int_type((width * 8) as u32);
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
// Use byte_size() to get the real storage width — the
// type parameter is RPG's digit-precision (e.g. 10 for
// Uns(10) = 4 bytes), NOT the byte count.
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
if let Ok(v) = self.builder.build_load(int_ty, ptr, name) {
let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
@@ -1111,7 +1240,15 @@ impl<'ctx> Codegen<'ctx> {
Expression::Paren(e) => self.gen_expression(e, state),
Expression::Call(name, args) => {
// Treat call-as-expression similarly to CALLP.
// RPG IV uses identical syntax for procedure calls and array
// subscripts: `name(arg)`. At parse time we always emit
// Expression::Call for `ident(...)`, so here we need to
// distinguish the two cases at code-generation time:
//
// 1. A real procedure/function exists in the module → call it.
// 2. The name refers to a local/global variable with a known
// DIM → treat the single argument as an array index.
// 3. Otherwise → return 0 (unknown call).
let callee = self.module.get_function(name)
.or_else(|| self.module.get_function(&format!("rpg_{}", name)));
if let Some(callee) = callee {
@@ -1128,13 +1265,79 @@ impl<'ctx> Codegen<'ctx> {
inkwell::values::ValueKind::Basic(v) => return Ok(v),
inkwell::values::ValueKind::Instruction(_) => {}
}
return Ok(i64_t.const_zero().into());
}
// No function found — check if `name` is an array variable and
// the call is actually a subscript read: name(idx).
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(Arg::Expr(idx_expr)) = args.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"call_elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") {
let iv = v.into_int_value();
let ext = self.builder
.build_int_s_extend(iv, i64_t, "call_elem_ext")
.unwrap_or(iv);
return Ok(ext.into());
}
}
_ => return Ok(elem_ptr.into()),
}
}
}
Ok(i64_t.const_zero().into())
}
Expression::BuiltIn(bif) => self.gen_builtin(bif, state),
Expression::Special(_) | Expression::Index(_, _) => {
Expression::Special(_) => {
Ok(i64_t.const_zero().into())
}
Expression::Index(qname, indices) => {
// Array element read: name(i) — RPG uses 1-based indexing.
let name = qname.leaf();
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(idx_expr) = indices.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
// Load the element with the element's integer type.
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") {
let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
return Ok(ext.into());
}
}
_ => {
return Ok(elem_ptr.into());
}
}
}
}
Ok(i64_t.const_zero().into())
}
}
@@ -1179,6 +1382,24 @@ impl<'ctx> Codegen<'ctx> {
match op {
BinOp::Add => {
// If either operand is a pointer (string), use rpg_concat.
let lv_is_ptr = matches!(lv, BasicValueEnum::PointerValue(_));
let rv_is_ptr = matches!(rv, BasicValueEnum::PointerValue(_));
if lv_is_ptr || rv_is_ptr {
// Ensure both sides are pointers (call rpg_char_i64 on integers).
let lp = self.coerce_to_cstr_ptr(lv, state);
let rp = self.coerce_to_cstr_ptr(rv, state);
let concat_fn = self.module.get_function("rpg_concat").unwrap();
let call = self.builder
.build_call(concat_fn, &[lp.into(), rp.into()], "concat")
.unwrap();
return match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(v) => Ok(v),
inkwell::values::ValueKind::Instruction(_) => {
Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
}
};
}
let l = self.coerce_to_i64(lv);
let r = self.coerce_to_i64(rv);
Ok(self.builder.build_int_add(l, r, "add").unwrap().into())
@@ -1243,6 +1464,18 @@ impl<'ctx> Codegen<'ctx> {
fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result<BasicValueEnum<'ctx>, CodegenError> {
let i64_t = self.context.i64_type();
match bif {
BuiltIn::Elem(e) => {
// %ELEM(array) — number of elements declared with DIM(n).
if let Expression::Variable(qname) = e.as_ref() {
let name = qname.leaf();
if let Some(&n) = self.array_dims.get(name) {
return Ok(i64_t.const_int(n, false).into());
}
// Fall back to 1 if not an array variable.
return Ok(i64_t.const_int(1, false).into());
}
Ok(i64_t.const_zero().into())
}
BuiltIn::Len(e) => {
// %LEN(field) — return compile-time field length.
if let Expression::Variable(qname) = e.as_ref() {
@@ -1267,8 +1500,30 @@ impl<'ctx> Codegen<'ctx> {
let abs = self.builder.build_select(cmp, iv, neg, "abs").unwrap();
Ok(abs.into())
}
BuiltIn::Int(e) | BuiltIn::Char(e) => {
self.gen_expression(e, state)
BuiltIn::Int(e) => {
let v = self.gen_expression(e, state)?;
Ok(self.coerce_to_i64(v).into())
}
BuiltIn::Char(e) => {
// %CHAR(expr) — convert to a null-terminated C string pointer.
let v = self.gen_expression(e, state)?;
match v {
BasicValueEnum::PointerValue(_) => Ok(v), // already a string
_ => {
// Convert integer to string via rpg_char_i64.
let iv = self.coerce_to_i64(v);
let char_fn = self.module.get_function("rpg_char_i64").unwrap();
let call = self.builder
.build_call(char_fn, &[iv.into()], "char_i64")
.unwrap();
match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(v) => Ok(v),
inkwell::values::ValueKind::Instruction(_) => {
Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
}
}
}
}
}
BuiltIn::Sqrt(e) => {
let v = self.gen_expression(e, state)?;
@@ -1387,6 +1642,32 @@ impl<'ctx> Codegen<'ctx> {
Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero()))
}
/// Coerce a value to a C string pointer (`i8*`).
///
/// * If `val` is already a pointer, return it as-is.
/// * If `val` is an integer, call `rpg_char_i64` to format it and return
/// the resulting pointer.
fn coerce_to_cstr_ptr(
&mut self,
val: BasicValueEnum<'ctx>,
_state: &mut FnState<'ctx>,
) -> PointerValue<'ctx> {
match val {
BasicValueEnum::PointerValue(p) => p,
_ => {
let iv = self.coerce_to_i64(val);
let char_fn = self.module.get_function("rpg_char_i64").unwrap();
let call = self.builder
.build_call(char_fn, &[iv.into()], "char_i64")
.unwrap();
match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(BasicValueEnum::PointerValue(p)) => p,
_ => self.context.ptr_type(AddressSpace::default()).const_null(),
}
}
}
}
fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> {
let i64_t = self.context.i64_type();
match val {

View File

@@ -45,6 +45,24 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
Ok(program)
}
/// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`,
/// `**CTDATA`) by blanking out those lines before tokenization.
fn strip_star_star_directives(source: &str) -> String {
source
.lines()
.map(|line| {
let trimmed = line.trim_start();
if trimmed.starts_with("**") {
// Replace with an empty line so line numbers stay consistent.
""
} else {
line
}
})
.collect::<Vec<_>>()
.join("\n")
}
// ─────────────────────────────────────────────────────────────────────────────
// Error type
// ─────────────────────────────────────────────────────────────────────────────
@@ -52,11 +70,17 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
#[derive(Debug)]
pub struct LowerError {
pub message: String,
/// 1-based source line where the error was detected, if known.
pub line: Option<usize>,
}
impl std::fmt::Display for LowerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "lower error: {}", self.message)
if let Some(ln) = self.line {
write!(f, "lower error (line {}): {}", ln, self.message)
} else {
write!(f, "lower error: {}", self.message)
}
}
}
@@ -64,7 +88,11 @@ impl std::error::Error for LowerError {}
impl LowerError {
fn new(msg: impl Into<String>) -> Self {
LowerError { message: msg.into() }
LowerError { message: msg.into(), line: None }
}
fn at(line: usize, msg: impl Into<String>) -> Self {
LowerError { message: msg.into(), line: Some(line) }
}
}
@@ -385,12 +413,22 @@ enum Token {
// ─────────────────────────────────────────────────────────────────────────────
fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
let chars: Vec<char> = source.chars().collect();
// Strip **FREE / **CTDATA / any **word compiler directives first.
let cleaned = strip_star_star_directives(source);
let chars: Vec<char> = cleaned.chars().collect();
let mut pos = 0;
let mut tokens = Vec::new();
let mut line: usize = 1;
while pos < chars.len() {
// Skip whitespace
// Track line numbers.
if chars[pos] == '\n' {
line += 1;
pos += 1;
continue;
}
// Skip other whitespace
if chars[pos].is_whitespace() {
pos += 1;
continue;
@@ -490,6 +528,14 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
'=' => { tokens.push(Token::OpEq); pos += 1; continue; }
'*' => {
if pos + 1 < chars.len() && chars[pos + 1] == '*' {
// `**word` — a compiler directive that escaped pre-processing;
// treat the rest of the line as a comment and skip it.
if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() {
while pos < chars.len() && chars[pos] != '\n' {
pos += 1;
}
continue;
}
tokens.push(Token::OpStar2);
pos += 2;
} else {
@@ -704,6 +750,7 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
}
tokens.push(Token::Eof);
let _ = line; // line tracking available for future per-token storage
Ok(tokens)
}
@@ -873,11 +920,12 @@ fn keyword_or_ident(upper: &str, original: &str) -> Token {
struct Parser {
tokens: Vec<Token>,
pos: usize,
_line: usize,
}
impl Parser {
fn new(tokens: Vec<Token>) -> Self {
Parser { tokens, pos: 0 }
Parser { tokens, pos: 0, _line: 1 }
}
fn peek(&self) -> &Token {
@@ -901,7 +949,10 @@ impl Parser {
if &tok == expected {
Ok(())
} else {
Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok)))
Err(LowerError::new(format!(
"expected {:?}, got {:?} (token index {})",
expected, tok, self.pos
)))
}
}
@@ -927,12 +978,21 @@ impl Parser {
fn parse_program(&mut self) -> Result<Program, LowerError> {
let mut declarations = Vec::new();
let mut procedures = Vec::new();
let mut skipped_tokens: Vec<String> = Vec::new();
while !self.is_eof() {
match self.peek() {
Token::KwDclProc => {
if let Ok(p) = self.parse_procedure() {
procedures.push(p);
if !skipped_tokens.is_empty() {
skipped_tokens.clear();
}
match self.parse_procedure() {
Ok(p) => procedures.push(p),
Err(e) => {
eprintln!("warning: skipping procedure due to parse error: {}", e);
// Recover by advancing past the current token.
self.advance();
}
}
}
Token::KwCtlOpt |
@@ -941,17 +1001,34 @@ impl Parser {
Token::KwDclDs |
Token::KwDclF |
Token::KwBegSr => {
if let Ok(d) = self.parse_declaration() {
declarations.push(d);
if !skipped_tokens.is_empty() {
skipped_tokens.clear();
}
match self.parse_declaration() {
Ok(d) => declarations.push(d),
Err(e) => {
eprintln!("warning: skipping declaration due to parse error: {}", e);
self.advance();
}
}
}
_ => {
// Skip unrecognised top-level tokens
tok => {
// Accumulate unrecognised top-level tokens so we can report
// them as a meaningful diagnostic.
skipped_tokens.push(format!("{:?}", tok));
self.advance();
}
}
}
if !skipped_tokens.is_empty() {
eprintln!(
"warning: {} unrecognised top-level token(s) were skipped: {}",
skipped_tokens.len(),
skipped_tokens.join(", ")
);
}
Ok(Program { declarations, procedures })
}
@@ -965,7 +1042,11 @@ impl Parser {
Token::KwDclDs => self.parse_dcl_ds(),
Token::KwDclF => self.parse_dcl_f(),
Token::KwBegSr => self.parse_subroutine(),
tok => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))),
tok => Err(LowerError::new(format!(
"unexpected token in declaration: {:?}\
expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR",
tok
))),
}
}
@@ -1256,6 +1337,18 @@ impl Parser {
fn parse_var_keyword(&mut self) -> VarKeyword {
match self.peek().clone() {
Token::KwDim => {
self.advance(); // KwDim
if self.peek() == &Token::LParen {
self.advance(); // (
if let Ok(expr) = self.parse_expression() {
self.eat(&Token::RParen);
return VarKeyword::Dim(expr);
}
self.eat(&Token::RParen);
}
VarKeyword::Other("DIM".to_string())
}
Token::KwInz => {
self.advance();
if self.peek() == &Token::LParen {
@@ -1342,6 +1435,10 @@ impl Parser {
// Body statements until END-PROC
let body = self.parse_statement_list(&[Token::KwEndProc]);
self.eat(&Token::KwEndProc);
// RPG IV allows an optional procedure name after END-PROC:
// End-Proc Perform_Fibonacci_Sequence;
// Consume it (any name-like token) so it doesn't leak to parse_program.
let _ = self.try_parse_name();
self.eat_semicolon();
Ok(Procedure { name, exported, pi, locals, body })
@@ -1893,6 +1990,8 @@ impl Parser {
if self.peek() == &Token::LParen {
// Peek ahead to decide: call or subscript-assignment?
// If after the matching ')' we see '=' it's an assignment, else call.
// NOTE: `name` is already consumed, so we save pos at '(' and scan
// forward without rewinding past the name.
let saved = self.pos;
self.advance(); // (
let mut depth = 1;
@@ -1904,11 +2003,22 @@ impl Parser {
}
}
let is_assign = self.peek() == &Token::OpEq;
self.pos = saved; // rewind
self.pos = saved; // rewind to '('
if is_assign {
// subscript assignment: `name(idx) = expr;`
let lv = self.parse_lvalue()?;
// Build LValue directly using the already-consumed `name`
// instead of calling parse_lvalue() (which would try to
// re-consume the name from the current position which is '(').
let qname = QualifiedName::simple(name.clone());
let mut indices = Vec::new();
self.advance(); // consume '('
indices.push(self.parse_expression()?);
while self.eat(&Token::Colon) {
indices.push(self.parse_expression()?);
}
self.eat(&Token::RParen);
let lv = LValue::Index(qname, indices);
self.expect(&Token::OpEq)?;
let value = self.parse_expression()?;
self.eat_semicolon();
@@ -2221,7 +2331,9 @@ impl Parser {
fn parse_builtin_expr(&mut self) -> Result<Expression, LowerError> {
let bif_tok = self.advance();
self.expect(&Token::LParen)?;
self.expect(&Token::LParen).map_err(|e| LowerError::new(format!(
"built-in function {:?}: {}", bif_tok, e.message
)))?;
let bif = match bif_tok {
Token::BifLen => {
let e = self.parse_expression()?;
@@ -2277,6 +2389,11 @@ impl Parser {
self.eat(&Token::RParen);
BuiltIn::Error
}
Token::BifElem => {
let e = self.parse_expression()?;
self.eat(&Token::RParen);
BuiltIn::Elem(Box::new(e))
}
Token::BifSize => {
let e = self.parse_expression()?;
self.eat(&Token::RParen);

View File

@@ -36,7 +36,6 @@
use std::{
fs,
path::PathBuf,
process,
};
@@ -44,6 +43,97 @@ use std::{
use clap::Parser as ClapParser;
use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};
// ─────────────────────────────────────────────────────────────────────────────
// BNF pre-processing helper
// ─────────────────────────────────────────────────────────────────────────────
/// Uppercase all keyword-like tokens in `source` while preserving the content
/// of string literals, line comments, and block comments unchanged.
///
/// This lets the BNF grammar (which uses uppercase terminal literals) validate
/// RPG IV source that uses mixed-case keywords such as `Ctl-Opt` or `Dcl-S`.
fn uppercase_keywords_for_bnf(source: &str) -> String {
let chars: Vec<char> = source.chars().collect();
let mut out = String::with_capacity(source.len());
let mut i = 0;
while i < chars.len() {
// Line comment // … \n — copy verbatim
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
while i < chars.len() && chars[i] != '\n' {
out.push(chars[i]);
i += 1;
}
continue;
}
// Block comment /* … */ — copy verbatim
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
out.push(chars[i]);
out.push(chars[i + 1]);
i += 2;
while i + 1 < chars.len() {
if chars[i] == '*' && chars[i + 1] == '/' {
out.push(chars[i]);
out.push(chars[i + 1]);
i += 2;
break;
}
out.push(chars[i]);
i += 1;
}
continue;
}
// String literal '…' — copy verbatim (including '' escape)
if chars[i] == '\'' {
out.push(chars[i]);
i += 1;
while i < chars.len() {
if chars[i] == '\'' {
out.push(chars[i]);
i += 1;
// '' is an escaped quote — keep going
if i < chars.len() && chars[i] == '\'' {
out.push(chars[i]);
i += 1;
} else {
break;
}
} else {
out.push(chars[i]);
i += 1;
}
}
continue;
}
// Identifier / keyword — uppercase it so the BNF terminals match
if chars[i].is_alphabetic() || chars[i] == '_' || chars[i] == '@' || chars[i] == '#' || chars[i] == '$' {
while i < chars.len()
&& (chars[i].is_alphanumeric()
|| chars[i] == '_'
|| chars[i] == '@'
|| chars[i] == '#'
|| chars[i] == '$'
|| (chars[i] == '-'
&& i + 1 < chars.len()
&& chars[i + 1].is_alphabetic()))
{
out.push(chars[i].to_ascii_uppercase());
i += 1;
}
continue;
}
// Everything else (operators, punctuation, whitespace, digits)
out.push(chars[i]);
i += 1;
}
out
}
// ─────────────────────────────────────────────────────────────────────────────
// CLI definition
// ─────────────────────────────────────────────────────────────────────────────
@@ -127,16 +217,82 @@ fn main() {
};
// ── BNF validation ────────────────────────────────────────────────────
let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program")
.or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file"));
// RPG IV keywords are case-insensitive, but the BNF grammar uses
// uppercase terminal literals. Normalise the source before checking.
let normalised = uppercase_keywords_for_bnf(source_text.trim());
let tree_opt = parse_as(&bnf_parser, normalised.trim(), "program")
.or_else(|| parse_as(&bnf_parser, normalised.trim(), "source-file"));
if tree_opt.is_none() {
// BNF validation is a structural sanity-check. Emit a warning so
// the developer knows something looks off, but continue with the
// lowering pass which is more permissive and gives better errors.
eprintln!(
"error: '{}' did not match the RPG IV grammar",
"warning: '{}' did not fully match the RPG IV grammar\
attempting to compile anyway",
source_path.display()
);
any_error = true;
continue;
// ── Helpful diagnostics ──────────────────────────────────────────
// Scan for the first line the BNF cannot classify to give the user
// a concrete hint about what caused the mismatch.
let top_level_rules = &[
"control-spec",
"standalone-decl",
"constant-decl",
"data-structure-decl",
"file-decl",
"procedure",
"subroutine",
"statement",
];
'outer: for (lineno, raw_line) in source_text.lines().enumerate() {
let trimmed = raw_line.trim();
let norm_check = trimmed.to_ascii_uppercase();
// Skip blanks, comments, compiler directives, and lines that
// introduce multi-line constructs (DCL-PROC, END-PROC, DCL-DS,
// END-DS, DCL-PI, END-PI, BEG-SR, END-SR) — these will never
// match a single-line grammar rule and are not errors.
if trimmed.is_empty()
|| trimmed.starts_with("//")
|| trimmed.starts_with("/*")
|| trimmed.starts_with("**")
|| norm_check.starts_with("DCL-PROC")
|| norm_check.starts_with("END-PROC")
|| norm_check.starts_with("DCL-DS")
|| norm_check.starts_with("END-DS")
|| norm_check.starts_with("DCL-PI")
|| norm_check.starts_with("END-PI")
|| norm_check.starts_with("BEG-SR")
|| norm_check.starts_with("END-SR")
{
continue;
}
// Strip inline line comments before BNF matching so that
// `fib(1) = 0; // some comment` doesn't cause a false positive.
let trimmed_no_comment = if let Some(idx) = trimmed.find("//") {
trimmed[..idx].trim_end()
} else {
trimmed
};
let norm_line = uppercase_keywords_for_bnf(trimmed_no_comment);
let mut matched = false;
for rule in top_level_rules {
if parse_as(&bnf_parser, norm_line.trim(), rule).is_some() {
matched = true;
break;
}
}
if !matched {
eprintln!(
" hint (line {}): unrecognised grammar construct: {:?}",
lineno + 1,
if trimmed.len() > 80 { &trimmed[..80] } else { trimmed }
);
break 'outer;
}
}
// Fall through — try lowering anyway.
}
// ── --emit-tree: print parse tree and stop ────────────────────────────

View File

@@ -1,9 +1,16 @@
<wsc> ::= ' ' | ' ' | '
' | '
' | '
'
<ws> ::= <wsc> | <wsc> <ws>
<opt-ws> ::= <ws> | ''
<program> ::= <opt-ws> <program-body> <opt-ws>
| <opt-ws> <free-directive> <opt-ws> <program-body> <opt-ws>
| <opt-ws> <free-directive> <opt-ws>
<free-directive> ::= '**FREE'
| '**free'
| '**Free'
<program-body> ::= <declaration-section> <opt-ws> <procedure-list>
| <declaration-section>
@@ -24,7 +31,9 @@
| <procedure>
<procedure> ::= 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
<proc-keyword-list> ::= <proc-keyword> <ws> <proc-keyword-list>
| <proc-keyword>