add: fib sample

2026-03-12 22:19:42 -07:00
parent 073c86d784
commit 31a6c8b91b
7 changed files with 756 additions and 46 deletions
@@ -220,6 +220,8 @@ pub enum VarKeyword {
    /// `INZ(*named-constant)` — initialise to named constant.
    InzNamed(NamedConstant),
    Static,
+    /// `DIM(n)` — declares the variable as an array with `n` elements.
+    Dim(Expression),
    Other(String),
 }

@@ -608,6 +610,8 @@ pub enum BuiltIn {
    Rem(Box<Expression>, Box<Expression>),
    /// `%DIV(a:b)`.
    Div(Box<Expression>, Box<Expression>),
+    /// `%ELEM(array)` — number of elements in an array.
+    Elem(Box<Expression>),
    /// Any built-in we haven't individually modelled.
    Other(String, Vec<Expression>),
 }
@@ -77,6 +77,7 @@ pub fn compile_to_object(
        module,
        builder,
        globals: HashMap::new(),
+        array_dims: HashMap::new(),
        string_cache: HashMap::new(),
        global_inits: Vec::new(),
    };
@@ -139,6 +140,7 @@ pub fn emit_ir(program: &Program) -> Result<String, CodegenError> {
        module,
        builder,
        globals: HashMap::new(),
+        array_dims: HashMap::new(),
        string_cache: HashMap::new(),
        global_inits: Vec::new(),
    };
@@ -180,6 +182,9 @@ struct Codegen<'ctx> {
    builder: Builder<'ctx>,
    /// Module-scope global variables  name -> (alloca/global ptr, TypeSpec)
    globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>,
+    /// Array dimension table: variable name -> number of elements.
+    /// Populated when a `DIM(n)` keyword is encountered.
+    array_dims: HashMap<String, u64>,
    /// Interned string literal globals (content -> global ptr).
    string_cache: HashMap<String, PointerValue<'ctx>>,
    /// Global declarations that need runtime initialisation (INZ with a value).
@@ -228,6 +233,25 @@ impl<'ctx> Codegen<'ctx> {
        // function is available when we build the @llvm.global_ctors entry.
        self.gen_global_init_fn()?;

+        // Determine the entry-point procedure name.
+        //
+        // Priority order:
+        //   1. `CTL-OPT MAIN(name)` — explicit entry point declaration.
+        //   2. The first EXPORT-ed procedure (legacy / hello.rpg style).
+        let ctl_main: Option<String> = program.declarations.iter().find_map(|d| {
+            if let Declaration::ControlSpec(cs) = d {
+                cs.keywords.iter().find_map(|kw| {
+                    if let CtlKeyword::Main(name) = kw {
+                        Some(name.clone())
+                    } else {
+                        None
+                    }
+                })
+            } else {
+                None
+            }
+        });
+
        // Generate each procedure.
        let mut exported_name: Option<String> = None;
        for proc in &program.procedures {
@@ -237,8 +261,10 @@ impl<'ctx> Codegen<'ctx> {
            self.gen_procedure(proc)?;
        }

-        // Emit a C `main()` wrapper that calls the exported entry point.
-        if let Some(name) = exported_name {
+        // Emit a C `main()` wrapper that calls the entry point.
+        // CTL-OPT MAIN(name) takes priority over EXPORT.
+        let entry = ctl_main.or(exported_name);
+        if let Some(name) = entry {
            self.gen_main_wrapper(&name)?;
        }

@@ -278,6 +304,18 @@ impl<'ctx> Codegen<'ctx> {
            false,
        );
        self.module.add_function("memset", memset_ty, None);
+
+        // void rpg_dsply_i64(i64 n)  — display an integer
+        let dsply_i64_ty = void_t.fn_type(&[i64_t.into()], false);
+        self.module.add_function("rpg_dsply_i64", dsply_i64_ty, None);
+
+        // i8* rpg_char_i64(i64 n)  — format integer to null-terminated C string
+        let char_i64_ty = i8_ptr.fn_type(&[i64_t.into()], false);
+        self.module.add_function("rpg_char_i64", char_i64_ty, None);
+
+        // i8* rpg_concat(i8* a, i8* b)  — concatenate two C strings
+        let concat_ty = i8_ptr.fn_type(&[i8_ptr.into(), i8_ptr.into()], false);
+        self.module.add_function("rpg_concat", concat_ty, None);
    }

    // ── Global declarations ─────────────────────────────────────────────────
@@ -384,7 +422,28 @@ impl<'ctx> Codegen<'ctx> {
    fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> {
        match decl {
            Declaration::Standalone(sd) => {
-                let ptr = self.alloca_for_type(&sd.ty, &sd.name);
+                // Check if a DIM(n) keyword is present — if so we allocate a
+                // contiguous block of `n * elem_size` bytes.
+                let dim = sd.keywords.iter().find_map(|kw| {
+                    if let VarKeyword::Dim(expr) = kw {
+                        const_int_from_expr(expr)
+                    } else {
+                        None
+                    }
+                });
+
+                let ptr = if let Some(n) = dim {
+                    // Array: allocate n elements of the element type.
+                    self.alloca_for_type_dim(&sd.ty, &sd.name, n)
+                } else {
+                    self.alloca_for_type(&sd.ty, &sd.name)
+                };
+
+                // Record the dimension so %Elem and indexing can use it.
+                if let Some(n) = dim {
+                    self.array_dims.insert(sd.name.clone(), n);
+                }
+
                // Apply initialiser if any.
                for kw in &sd.keywords {
                    match kw {
@@ -415,6 +474,35 @@ impl<'ctx> Codegen<'ctx> {
        self.builder.build_alloca(arr_ty, name).unwrap()
    }

+    /// Allocate storage for an array of `n` elements of type `ty`.
+    fn alloca_for_type_dim(&self, ty: &TypeSpec, name: &str, n: u64) -> PointerValue<'ctx> {
+        let elem_size = ty.byte_size().unwrap_or(8) as u32;
+        let total = elem_size * (n as u32);
+        let arr_ty = self.context.i8_type().array_type(total);
+        self.builder.build_alloca(arr_ty, name).unwrap()
+    }
+
+    /// Return a pointer to element `index` (1-based, RPG convention) of array `base_ptr`.
+    /// `elem_size` is the byte size of one element.
+    fn array_elem_ptr(
+        &self,
+        base_ptr: PointerValue<'ctx>,
+        index: inkwell::values::IntValue<'ctx>,
+        elem_size: u64,
+    ) -> PointerValue<'ctx> {
+        let i64_t = self.context.i64_type();
+        // RPG arrays are 1-based — subtract 1 to get a 0-based byte offset.
+        let one = i64_t.const_int(1, false);
+        let zero_based = self.builder.build_int_sub(index, one, "idx0").unwrap();
+        let elem_bytes = i64_t.const_int(elem_size, false);
+        let byte_off = self.builder.build_int_mul(zero_based, elem_bytes, "byte_off").unwrap();
+        unsafe {
+            self.builder
+                .build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr")
+                .unwrap()
+        }
+    }
+
    fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> {
        let size = ty.byte_size().unwrap_or(0);
        if size == 0 { return Ok(()); }
@@ -604,8 +692,11 @@ impl<'ctx> Codegen<'ctx> {
        self.builder.position_at_end(bb);

        // Call the RPG entry procedure.
-        let rpg_fn_name = format!("rpg_{}", rpg_entry);
-        if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) {
+        // Try the bare name first (CTL-OPT MAIN procedures are not renamed),
+        // then the `rpg_` prefix used for EXPORT-ed procedures.
+        let callee = self.module.get_function(rpg_entry)
+            .or_else(|| self.module.get_function(&format!("rpg_{}", rpg_entry)));
+        if let Some(rpg_fn) = callee {
            self.builder.build_call(rpg_fn, &[], "call_rpg").ok();
        }

@@ -699,13 +790,31 @@ impl<'ctx> Codegen<'ctx> {
                self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok();
            }
            other => {
-                // Evaluate as integer-like expression and display it.
+                // Evaluate the expression; dispatch to the right display helper.
                if let Ok(val) = self.gen_expression(other, state) {
-                    // For now just call dsply_cstr on an empty string as fallback.
-                    let _ = val;
-                    let empty = self.intern_string("");
-                    let zero  = self.context.i64_type().const_zero();
-                    self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
+                    match val {
+                        BasicValueEnum::PointerValue(ptr) => {
+                            // String pointer — use rpg_dsply_cstr.
+                            if let Some(dsply_cstr) = self.module.get_function("rpg_dsply_cstr") {
+                                self.builder.build_call(dsply_cstr, &[ptr.into()], "dsply_cstr").ok();
+                            }
+                        }
+                        BasicValueEnum::IntValue(iv) => {
+                            // Integer — use rpg_dsply_i64.
+                            if let Some(dsply_i64) = self.module.get_function("rpg_dsply_i64") {
+                                let ext = self.builder
+                                    .build_int_s_extend(iv, self.context.i64_type(), "dsply_ext")
+                                    .unwrap_or(iv);
+                                self.builder.build_call(dsply_i64, &[ext.into()], "dsply_i64").ok();
+                            }
+                        }
+                        _ => {
+                            // Fallback: display an empty string.
+                            let empty = self.intern_string("");
+                            let zero  = self.context.i64_type().const_zero();
+                            self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
+                        }
+                    }
                }
            }
        }
@@ -723,6 +832,20 @@ impl<'ctx> Codegen<'ctx> {
        // Clone to avoid borrow issues.
        let ty = ty.clone();

+        // If the LValue has an index (array assignment), compute the element pointer.
+        let dest_ptr = if let LValue::Index(_, indices) = &a.target {
+            if let Some(idx_expr) = indices.first() {
+                let elem_size = ty.byte_size().unwrap_or(8);
+                let idx_val = self.gen_expression(idx_expr, state)?;
+                let idx_i   = self.coerce_to_i64(idx_val);
+                self.array_elem_ptr(ptr, idx_i, elem_size)
+            } else {
+                ptr
+            }
+        } else {
+            ptr
+        };
+
        match &ty {
            TypeSpec::Char(size_expr) => {
                if let Expression::Literal(Literal::String(s)) = &a.value {
@@ -733,16 +856,16 @@ impl<'ctx> Codegen<'ctx> {
                    let src = self.intern_bytes(&padded);
                    let memcpy = self.module.get_function("memcpy").unwrap();
                    let len = self.context.i64_type().const_int(field_len as u64, false);
-                    self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok();
+                    self.builder.build_call(memcpy, &[dest_ptr.into(), src.into(), len.into()], "assign").ok();
                }
            }
            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
                let val = self.gen_expression(&a.value, state)?;
-                self.store_value(ptr, val, &ty);
+                self.store_value(dest_ptr, val, &ty);
            }
            _ => {
                if let Ok(val) = self.gen_expression(&a.value, state) {
-                    self.store_value(ptr, val, &ty);
+                    self.store_value(dest_ptr, val, &ty);
                }
            }
        }
@@ -893,7 +1016,10 @@ impl<'ctx> Codegen<'ctx> {
        let start = self.gen_expression(&f.start, state)?;
        let start_i = self.coerce_to_i64(start);
        self.builder.build_store(loop_var, start_i).ok();
-        state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10))))));
+        // Store the loop variable with Int(20) so that byte_size() returns 8,
+        // matching the i64 alloca above.  (Int(10) would give 4 bytes, causing
+        // a 32-bit load from an 8-byte slot.)
+        state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(20))))));

        let cond_bb  = self.context.append_basic_block(func, "for_cond");
        let body_bb  = self.context.append_basic_block(func, "for_body");
@@ -1062,9 +1188,12 @@ impl<'ctx> Codegen<'ctx> {
                    let llvm_ty = self.type_spec_to_llvm(&ty)
                        .unwrap_or(BasicTypeEnum::IntType(i64_t));
                    match &ty {
-                        TypeSpec::Int(w) | TypeSpec::Uns(w) => {
-                            let width = const_int_from_expr(w).unwrap_or(8);
-                            let int_ty = self.context.custom_width_int_type((width * 8) as u32);
+                        TypeSpec::Int(_) | TypeSpec::Uns(_) => {
+                            // Use byte_size() to get the real storage width — the
+                            // type parameter is RPG's digit-precision (e.g. 10 for
+                            // Uns(10) = 4 bytes), NOT the byte count.
+                            let bytes = ty.byte_size().unwrap_or(8);
+                            let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
                            if let Ok(v) = self.builder.build_load(int_ty, ptr, name) {
                                let iv = v.into_int_value();
                                let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
@@ -1111,7 +1240,15 @@ impl<'ctx> Codegen<'ctx> {
            Expression::Paren(e) => self.gen_expression(e, state),

            Expression::Call(name, args) => {
-                // Treat call-as-expression similarly to CALLP.
+                // RPG IV uses identical syntax for procedure calls and array
+                // subscripts: `name(arg)`.  At parse time we always emit
+                // Expression::Call for `ident(...)`, so here we need to
+                // distinguish the two cases at code-generation time:
+                //
+                //   1. A real procedure/function exists in the module → call it.
+                //   2. The name refers to a local/global variable with a known
+                //      DIM → treat the single argument as an array index.
+                //   3. Otherwise → return 0 (unknown call).
                let callee = self.module.get_function(name)
                    .or_else(|| self.module.get_function(&format!("rpg_{}", name)));
                if let Some(callee) = callee {
@@ -1128,13 +1265,79 @@ impl<'ctx> Codegen<'ctx> {
                        inkwell::values::ValueKind::Basic(v) => return Ok(v),
                        inkwell::values::ValueKind::Instruction(_) => {}
                    }
+                    return Ok(i64_t.const_zero().into());
                }
+
+                // No function found — check if `name` is an array variable and
+                // the call is actually a subscript read: name(idx).
+                if let Some((ptr, ty)) = self.resolve_var(name, state) {
+                    let elem_size = ty.byte_size().unwrap_or(8);
+                    if let Some(Arg::Expr(idx_expr)) = args.first() {
+                        let idx_val = self.gen_expression(idx_expr, state)?;
+                        let idx_i   = self.coerce_to_i64(idx_val);
+                        let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
+                        match &ty {
+                            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
+                                let bytes   = ty.byte_size().unwrap_or(8);
+                                let int_ty  = self.context.custom_width_int_type((bytes * 8) as u32);
+                                let cast_ptr = self.builder.build_pointer_cast(
+                                    elem_ptr,
+                                    self.context.ptr_type(AddressSpace::default()),
+                                    "call_elem_ptr_cast",
+                                ).unwrap_or(elem_ptr);
+                                if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") {
+                                    let iv  = v.into_int_value();
+                                    let ext = self.builder
+                                        .build_int_s_extend(iv, i64_t, "call_elem_ext")
+                                        .unwrap_or(iv);
+                                    return Ok(ext.into());
+                                }
+                            }
+                            _ => return Ok(elem_ptr.into()),
+                        }
+                    }
+                }
+
                Ok(i64_t.const_zero().into())
            }

            Expression::BuiltIn(bif) => self.gen_builtin(bif, state),

-            Expression::Special(_) | Expression::Index(_, _) => {
+            Expression::Special(_) => {
+                Ok(i64_t.const_zero().into())
+            }
+
+            Expression::Index(qname, indices) => {
+                // Array element read: name(i) — RPG uses 1-based indexing.
+                let name = qname.leaf();
+                if let Some((ptr, ty)) = self.resolve_var(name, state) {
+                    let elem_size = ty.byte_size().unwrap_or(8);
+                    if let Some(idx_expr) = indices.first() {
+                        let idx_val = self.gen_expression(idx_expr, state)?;
+                        let idx_i   = self.coerce_to_i64(idx_val);
+                        let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
+                        // Load the element with the element's integer type.
+                        match &ty {
+                            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
+                                let bytes = ty.byte_size().unwrap_or(8);
+                                let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
+                                let cast_ptr = self.builder.build_pointer_cast(
+                                    elem_ptr,
+                                    self.context.ptr_type(AddressSpace::default()),
+                                    "elem_ptr_cast",
+                                ).unwrap_or(elem_ptr);
+                                if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") {
+                                    let iv = v.into_int_value();
+                                    let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
+                                    return Ok(ext.into());
+                                }
+                            }
+                            _ => {
+                                return Ok(elem_ptr.into());
+                            }
+                        }
+                    }
+                }
                Ok(i64_t.const_zero().into())
            }
        }
@@ -1179,6 +1382,24 @@ impl<'ctx> Codegen<'ctx> {

        match op {
            BinOp::Add => {
+                // If either operand is a pointer (string), use rpg_concat.
+                let lv_is_ptr = matches!(lv, BasicValueEnum::PointerValue(_));
+                let rv_is_ptr = matches!(rv, BasicValueEnum::PointerValue(_));
+                if lv_is_ptr || rv_is_ptr {
+                    // Ensure both sides are pointers (call rpg_char_i64 on integers).
+                    let lp = self.coerce_to_cstr_ptr(lv, state);
+                    let rp = self.coerce_to_cstr_ptr(rv, state);
+                    let concat_fn = self.module.get_function("rpg_concat").unwrap();
+                    let call = self.builder
+                        .build_call(concat_fn, &[lp.into(), rp.into()], "concat")
+                        .unwrap();
+                    return match call.try_as_basic_value() {
+                        inkwell::values::ValueKind::Basic(v) => Ok(v),
+                        inkwell::values::ValueKind::Instruction(_) => {
+                            Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
+                        }
+                    };
+                }
                let l = self.coerce_to_i64(lv);
                let r = self.coerce_to_i64(rv);
                Ok(self.builder.build_int_add(l, r, "add").unwrap().into())
@@ -1243,6 +1464,18 @@ impl<'ctx> Codegen<'ctx> {
    fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result<BasicValueEnum<'ctx>, CodegenError> {
        let i64_t = self.context.i64_type();
        match bif {
+            BuiltIn::Elem(e) => {
+                // %ELEM(array) — number of elements declared with DIM(n).
+                if let Expression::Variable(qname) = e.as_ref() {
+                    let name = qname.leaf();
+                    if let Some(&n) = self.array_dims.get(name) {
+                        return Ok(i64_t.const_int(n, false).into());
+                    }
+                    // Fall back to 1 if not an array variable.
+                    return Ok(i64_t.const_int(1, false).into());
+                }
+                Ok(i64_t.const_zero().into())
+            }
            BuiltIn::Len(e) => {
                // %LEN(field) — return compile-time field length.
                if let Expression::Variable(qname) = e.as_ref() {
@@ -1267,8 +1500,30 @@ impl<'ctx> Codegen<'ctx> {
                let abs  = self.builder.build_select(cmp, iv, neg, "abs").unwrap();
                Ok(abs.into())
            }
-            BuiltIn::Int(e) | BuiltIn::Char(e) => {
-                self.gen_expression(e, state)
+            BuiltIn::Int(e) => {
+                let v = self.gen_expression(e, state)?;
+                Ok(self.coerce_to_i64(v).into())
+            }
+            BuiltIn::Char(e) => {
+                // %CHAR(expr) — convert to a null-terminated C string pointer.
+                let v = self.gen_expression(e, state)?;
+                match v {
+                    BasicValueEnum::PointerValue(_) => Ok(v), // already a string
+                    _ => {
+                        // Convert integer to string via rpg_char_i64.
+                        let iv = self.coerce_to_i64(v);
+                        let char_fn = self.module.get_function("rpg_char_i64").unwrap();
+                        let call = self.builder
+                            .build_call(char_fn, &[iv.into()], "char_i64")
+                            .unwrap();
+                        match call.try_as_basic_value() {
+                            inkwell::values::ValueKind::Basic(v) => Ok(v),
+                            inkwell::values::ValueKind::Instruction(_) => {
+                                Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
+                            }
+                        }
+                    }
+                }
            }
            BuiltIn::Sqrt(e) => {
                let v = self.gen_expression(e, state)?;
@@ -1387,6 +1642,32 @@ impl<'ctx> Codegen<'ctx> {
        Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero()))
    }

+    /// Coerce a value to a C string pointer (`i8*`).
+    ///
+    /// * If `val` is already a pointer, return it as-is.
+    /// * If `val` is an integer, call `rpg_char_i64` to format it and return
+    ///   the resulting pointer.
+    fn coerce_to_cstr_ptr(
+        &mut self,
+        val: BasicValueEnum<'ctx>,
+        _state: &mut FnState<'ctx>,
+    ) -> PointerValue<'ctx> {
+        match val {
+            BasicValueEnum::PointerValue(p) => p,
+            _ => {
+                let iv = self.coerce_to_i64(val);
+                let char_fn = self.module.get_function("rpg_char_i64").unwrap();
+                let call = self.builder
+                    .build_call(char_fn, &[iv.into()], "char_i64")
+                    .unwrap();
+                match call.try_as_basic_value() {
+                    inkwell::values::ValueKind::Basic(BasicValueEnum::PointerValue(p)) => p,
+                    _ => self.context.ptr_type(AddressSpace::default()).const_null(),
+                }
+            }
+        }
+    }
+
    fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> {
        let i64_t = self.context.i64_type();
        match val {
@@ -45,6 +45,24 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
    Ok(program)
 }

+/// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`,
+/// `**CTDATA`) by blanking out those lines before tokenization.
+fn strip_star_star_directives(source: &str) -> String {
+    source
+        .lines()
+        .map(|line| {
+            let trimmed = line.trim_start();
+            if trimmed.starts_with("**") {
+                // Replace with an empty line so line numbers stay consistent.
+                ""
+            } else {
+                line
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // Error type
 // ─────────────────────────────────────────────────────────────────────────────
@@ -52,11 +70,17 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
 #[derive(Debug)]
 pub struct LowerError {
    pub message: String,
+    /// 1-based source line where the error was detected, if known.
+    pub line: Option<usize>,
 }

 impl std::fmt::Display for LowerError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "lower error: {}", self.message)
+        if let Some(ln) = self.line {
+            write!(f, "lower error (line {}): {}", ln, self.message)
+        } else {
+            write!(f, "lower error: {}", self.message)
+        }
    }
 }

@@ -64,7 +88,11 @@ impl std::error::Error for LowerError {}

 impl LowerError {
    fn new(msg: impl Into<String>) -> Self {
-        LowerError { message: msg.into() }
+        LowerError { message: msg.into(), line: None }
+    }
+
+    fn at(line: usize, msg: impl Into<String>) -> Self {
+        LowerError { message: msg.into(), line: Some(line) }
    }
 }

@@ -385,12 +413,22 @@ enum Token {
 // ─────────────────────────────────────────────────────────────────────────────

 fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
-    let chars: Vec<char> = source.chars().collect();
+    // Strip **FREE / **CTDATA / any **word compiler directives first.
+    let cleaned = strip_star_star_directives(source);
+    let chars: Vec<char> = cleaned.chars().collect();
    let mut pos = 0;
    let mut tokens = Vec::new();
+    let mut line: usize = 1;

    while pos < chars.len() {
-        // Skip whitespace
+        // Track line numbers.
+        if chars[pos] == '\n' {
+            line += 1;
+            pos += 1;
+            continue;
+        }
+
+        // Skip other whitespace
        if chars[pos].is_whitespace() {
            pos += 1;
            continue;
@@ -490,6 +528,14 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
            '=' => { tokens.push(Token::OpEq); pos += 1; continue; }
            '*' => {
                if pos + 1 < chars.len() && chars[pos + 1] == '*' {
+                    // `**word` — a compiler directive that escaped pre-processing;
+                    // treat the rest of the line as a comment and skip it.
+                    if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() {
+                        while pos < chars.len() && chars[pos] != '\n' {
+                            pos += 1;
+                        }
+                        continue;
+                    }
                    tokens.push(Token::OpStar2);
                    pos += 2;
                } else {
@@ -704,6 +750,7 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
    }

    tokens.push(Token::Eof);
+    let _ = line; // line tracking available for future per-token storage
    Ok(tokens)
 }

@@ -873,11 +920,12 @@ fn keyword_or_ident(upper: &str, original: &str) -> Token {
 struct Parser {
    tokens: Vec<Token>,
    pos: usize,
+    _line: usize,
 }

 impl Parser {
    fn new(tokens: Vec<Token>) -> Self {
-        Parser { tokens, pos: 0 }
+        Parser { tokens, pos: 0, _line: 1 }
    }

    fn peek(&self) -> &Token {
@@ -901,7 +949,10 @@ impl Parser {
        if &tok == expected {
            Ok(())
        } else {
-            Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok)))
+            Err(LowerError::new(format!(
+                "expected {:?}, got {:?} (token index {})",
+                expected, tok, self.pos
+            )))
        }
    }

@@ -927,12 +978,21 @@ impl Parser {
    fn parse_program(&mut self) -> Result<Program, LowerError> {
        let mut declarations = Vec::new();
        let mut procedures   = Vec::new();
+        let mut skipped_tokens: Vec<String> = Vec::new();

        while !self.is_eof() {
            match self.peek() {
                Token::KwDclProc => {
-                    if let Ok(p) = self.parse_procedure() {
-                        procedures.push(p);
+                    if !skipped_tokens.is_empty() {
+                        skipped_tokens.clear();
+                    }
+                    match self.parse_procedure() {
+                        Ok(p)  => procedures.push(p),
+                        Err(e) => {
+                            eprintln!("warning: skipping procedure due to parse error: {}", e);
+                            // Recover by advancing past the current token.
+                            self.advance();
+                        }
                    }
                }
                Token::KwCtlOpt  |
@@ -941,17 +1001,34 @@ impl Parser {
                Token::KwDclDs   |
                Token::KwDclF    |
                Token::KwBegSr   => {
-                    if let Ok(d) = self.parse_declaration() {
-                        declarations.push(d);
+                    if !skipped_tokens.is_empty() {
+                        skipped_tokens.clear();
+                    }
+                    match self.parse_declaration() {
+                        Ok(d)  => declarations.push(d),
+                        Err(e) => {
+                            eprintln!("warning: skipping declaration due to parse error: {}", e);
+                            self.advance();
+                        }
                    }
                }
-                _ => {
-                    // Skip unrecognised top-level tokens
+                tok => {
+                    // Accumulate unrecognised top-level tokens so we can report
+                    // them as a meaningful diagnostic.
+                    skipped_tokens.push(format!("{:?}", tok));
                    self.advance();
                }
            }
        }

+        if !skipped_tokens.is_empty() {
+            eprintln!(
+                "warning: {} unrecognised top-level token(s) were skipped: {}",
+                skipped_tokens.len(),
+                skipped_tokens.join(", ")
+            );
+        }
+
        Ok(Program { declarations, procedures })
    }

@@ -965,7 +1042,11 @@ impl Parser {
            Token::KwDclDs  => self.parse_dcl_ds(),
            Token::KwDclF   => self.parse_dcl_f(),
            Token::KwBegSr  => self.parse_subroutine(),
-            tok             => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))),
+            tok             => Err(LowerError::new(format!(
+                "unexpected token in declaration: {:?} — \
+                 expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR",
+                tok
+            ))),
        }
    }

@@ -1256,6 +1337,18 @@ impl Parser {

    fn parse_var_keyword(&mut self) -> VarKeyword {
        match self.peek().clone() {
+            Token::KwDim => {
+                self.advance(); // KwDim
+                if self.peek() == &Token::LParen {
+                    self.advance(); // (
+                    if let Ok(expr) = self.parse_expression() {
+                        self.eat(&Token::RParen);
+                        return VarKeyword::Dim(expr);
+                    }
+                    self.eat(&Token::RParen);
+                }
+                VarKeyword::Other("DIM".to_string())
+            }
            Token::KwInz => {
                self.advance();
                if self.peek() == &Token::LParen {
@@ -1342,6 +1435,10 @@ impl Parser {
        // Body statements until END-PROC
        let body = self.parse_statement_list(&[Token::KwEndProc]);
        self.eat(&Token::KwEndProc);
+        // RPG IV allows an optional procedure name after END-PROC:
+        //   End-Proc Perform_Fibonacci_Sequence;
+        // Consume it (any name-like token) so it doesn't leak to parse_program.
+        let _ = self.try_parse_name();
        self.eat_semicolon();

        Ok(Procedure { name, exported, pi, locals, body })
@@ -1893,6 +1990,8 @@ impl Parser {
        if self.peek() == &Token::LParen {
            // Peek ahead to decide: call or subscript-assignment?
            // If after the matching ')' we see '=' it's an assignment, else call.
+            // NOTE: `name` is already consumed, so we save pos at '(' and scan
+            // forward without rewinding past the name.
            let saved = self.pos;
            self.advance(); // (
            let mut depth = 1;
@@ -1904,11 +2003,22 @@ impl Parser {
                }
            }
            let is_assign = self.peek() == &Token::OpEq;
-            self.pos = saved; // rewind
+            self.pos = saved; // rewind to '('

            if is_assign {
                // subscript assignment: `name(idx) = expr;`
-                let lv = self.parse_lvalue()?;
+                // Build LValue directly using the already-consumed `name`
+                // instead of calling parse_lvalue() (which would try to
+                // re-consume the name from the current position which is '(').
+                let qname = QualifiedName::simple(name.clone());
+                let mut indices = Vec::new();
+                self.advance(); // consume '('
+                indices.push(self.parse_expression()?);
+                while self.eat(&Token::Colon) {
+                    indices.push(self.parse_expression()?);
+                }
+                self.eat(&Token::RParen);
+                let lv = LValue::Index(qname, indices);
                self.expect(&Token::OpEq)?;
                let value = self.parse_expression()?;
                self.eat_semicolon();
@@ -2221,7 +2331,9 @@ impl Parser {

    fn parse_builtin_expr(&mut self) -> Result<Expression, LowerError> {
        let bif_tok = self.advance();
-        self.expect(&Token::LParen)?;
+        self.expect(&Token::LParen).map_err(|e| LowerError::new(format!(
+            "built-in function {:?}: {}", bif_tok, e.message
+        )))?;
        let bif = match bif_tok {
            Token::BifLen => {
                let e = self.parse_expression()?;
@@ -2277,6 +2389,11 @@ impl Parser {
                self.eat(&Token::RParen);
                BuiltIn::Error
            }
+            Token::BifElem => {
+                let e = self.parse_expression()?;
+                self.eat(&Token::RParen);
+                BuiltIn::Elem(Box::new(e))
+            }
            Token::BifSize => {
                let e = self.parse_expression()?;
                self.eat(&Token::RParen);
@@ -36,7 +36,6 @@

 use std::{
    fs,
-
    path::PathBuf,
    process,
 };
@@ -44,6 +43,97 @@ use std::{
 use clap::Parser as ClapParser;
 use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};

+// ─────────────────────────────────────────────────────────────────────────────
+// BNF pre-processing helper
+// ─────────────────────────────────────────────────────────────────────────────
+
+/// Uppercase all keyword-like tokens in `source` while preserving the content
+/// of string literals, line comments, and block comments unchanged.
+///
+/// This lets the BNF grammar (which uses uppercase terminal literals) validate
+/// RPG IV source that uses mixed-case keywords such as `Ctl-Opt` or `Dcl-S`.
+fn uppercase_keywords_for_bnf(source: &str) -> String {
+    let chars: Vec<char> = source.chars().collect();
+    let mut out = String::with_capacity(source.len());
+    let mut i = 0;
+
+    while i < chars.len() {
+        // Line comment  // … \n  — copy verbatim
+        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
+            while i < chars.len() && chars[i] != '\n' {
+                out.push(chars[i]);
+                i += 1;
+            }
+            continue;
+        }
+
+        // Block comment  /* … */  — copy verbatim
+        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
+            out.push(chars[i]);
+            out.push(chars[i + 1]);
+            i += 2;
+            while i + 1 < chars.len() {
+                if chars[i] == '*' && chars[i + 1] == '/' {
+                    out.push(chars[i]);
+                    out.push(chars[i + 1]);
+                    i += 2;
+                    break;
+                }
+                out.push(chars[i]);
+                i += 1;
+            }
+            continue;
+        }
+
+        // String literal  '…'  — copy verbatim (including '' escape)
+        if chars[i] == '\'' {
+            out.push(chars[i]);
+            i += 1;
+            while i < chars.len() {
+                if chars[i] == '\'' {
+                    out.push(chars[i]);
+                    i += 1;
+                    // '' is an escaped quote — keep going
+                    if i < chars.len() && chars[i] == '\'' {
+                        out.push(chars[i]);
+                        i += 1;
+                    } else {
+                        break;
+                    }
+                } else {
+                    out.push(chars[i]);
+                    i += 1;
+                }
+            }
+            continue;
+        }
+
+        // Identifier / keyword — uppercase it so the BNF terminals match
+        if chars[i].is_alphabetic() || chars[i] == '_' || chars[i] == '@' || chars[i] == '#' || chars[i] == '$' {
+            while i < chars.len()
+                && (chars[i].is_alphanumeric()
+                    || chars[i] == '_'
+                    || chars[i] == '@'
+                    || chars[i] == '#'
+                    || chars[i] == '$'
+                    || (chars[i] == '-'
+                        && i + 1 < chars.len()
+                        && chars[i + 1].is_alphabetic()))
+            {
+                out.push(chars[i].to_ascii_uppercase());
+                i += 1;
+            }
+            continue;
+        }
+
+        // Everything else (operators, punctuation, whitespace, digits)
+        out.push(chars[i]);
+        i += 1;
+    }
+
+    out
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // CLI definition
 // ─────────────────────────────────────────────────────────────────────────────
@@ -127,16 +217,82 @@ fn main() {
        };

        // ── BNF validation ────────────────────────────────────────────────────
-        let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program")
-            .or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file"));
+        // RPG IV keywords are case-insensitive, but the BNF grammar uses
+        // uppercase terminal literals.  Normalise the source before checking.
+        let normalised = uppercase_keywords_for_bnf(source_text.trim());
+        let tree_opt = parse_as(&bnf_parser, normalised.trim(), "program")
+            .or_else(|| parse_as(&bnf_parser, normalised.trim(), "source-file"));

        if tree_opt.is_none() {
+            // BNF validation is a structural sanity-check.  Emit a warning so
+            // the developer knows something looks off, but continue with the
+            // lowering pass which is more permissive and gives better errors.
            eprintln!(
-                "error: '{}' did not match the RPG IV grammar",
+                "warning: '{}' did not fully match the RPG IV grammar — \
+                 attempting to compile anyway",
                source_path.display()
            );
-            any_error = true;
-            continue;
+
+            // ── Helpful diagnostics ──────────────────────────────────────────
+            // Scan for the first line the BNF cannot classify to give the user
+            // a concrete hint about what caused the mismatch.
+            let top_level_rules = &[
+                "control-spec",
+                "standalone-decl",
+                "constant-decl",
+                "data-structure-decl",
+                "file-decl",
+                "procedure",
+                "subroutine",
+                "statement",
+            ];
+            'outer: for (lineno, raw_line) in source_text.lines().enumerate() {
+                let trimmed = raw_line.trim();
+                let norm_check = trimmed.to_ascii_uppercase();
+                // Skip blanks, comments, compiler directives, and lines that
+                // introduce multi-line constructs (DCL-PROC, END-PROC, DCL-DS,
+                // END-DS, DCL-PI, END-PI, BEG-SR, END-SR) — these will never
+                // match a single-line grammar rule and are not errors.
+                if trimmed.is_empty()
+                    || trimmed.starts_with("//")
+                    || trimmed.starts_with("/*")
+                    || trimmed.starts_with("**")
+                    || norm_check.starts_with("DCL-PROC")
+                    || norm_check.starts_with("END-PROC")
+                    || norm_check.starts_with("DCL-DS")
+                    || norm_check.starts_with("END-DS")
+                    || norm_check.starts_with("DCL-PI")
+                    || norm_check.starts_with("END-PI")
+                    || norm_check.starts_with("BEG-SR")
+                    || norm_check.starts_with("END-SR")
+                {
+                    continue;
+                }
+                // Strip inline line comments before BNF matching so that
+                // `fib(1) = 0; // some comment` doesn't cause a false positive.
+                let trimmed_no_comment = if let Some(idx) = trimmed.find("//") {
+                    trimmed[..idx].trim_end()
+                } else {
+                    trimmed
+                };
+                let norm_line = uppercase_keywords_for_bnf(trimmed_no_comment);
+                let mut matched = false;
+                for rule in top_level_rules {
+                    if parse_as(&bnf_parser, norm_line.trim(), rule).is_some() {
+                        matched = true;
+                        break;
+                    }
+                }
+                if !matched {
+                    eprintln!(
+                        "  hint (line {}): unrecognised grammar construct: {:?}",
+                        lineno + 1,
+                        if trimmed.len() > 80 { &trimmed[..80] } else { trimmed }
+                    );
+                    break 'outer;
+                }
+            }
+            // Fall through — try lowering anyway.
        }

        // ── --emit-tree: print parse tree and stop ────────────────────────────
@@ -1,9 +1,16 @@
 <wsc> ::= ' ' | '	' | '
-' | '
'
+' | '
+'
 <ws> ::= <wsc> | <wsc> <ws>
 <opt-ws> ::= <ws> | ''

 <program> ::= <opt-ws> <program-body> <opt-ws>
+            | <opt-ws> <free-directive> <opt-ws> <program-body> <opt-ws>
+            | <opt-ws> <free-directive> <opt-ws>
+
+<free-directive> ::= '**FREE'
+                   | '**free'
+                   | '**Free'

 <program-body> ::= <declaration-section> <opt-ws> <procedure-list>
                 | <declaration-section>
@@ -24,7 +31,9 @@
                   | <procedure>

 <procedure> ::= 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
+              | 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
              | 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
+              | 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'

 <proc-keyword-list> ::= <proc-keyword> <ws> <proc-keyword-list>
                      | <proc-keyword>