From 8e36afbf676d3a829738064c5cadef0f36f0fec5 Mon Sep 17 00:00:00 2001 From: charles Date: Thu, 12 Mar 2026 23:08:53 -0700 Subject: [PATCH] add: dfs test, and code fixes --- samples/dfs.rpg | 68 +++++++++ samples/dfs.rpg.stdout | 9 ++ src/ast.rs | 5 +- src/codegen.rs | 316 ++++++++++++++++++++++++++++++++--------- src/lower.rs | 15 +- 5 files changed, 341 insertions(+), 72 deletions(-) create mode 100644 samples/dfs.rpg create mode 100644 samples/dfs.rpg.stdout diff --git a/samples/dfs.rpg b/samples/dfs.rpg new file mode 100644 index 0000000..39ee03d --- /dev/null +++ b/samples/dfs.rpg @@ -0,0 +1,68 @@ +**Free +Ctl-Opt DftActGrp(*No) Main(MainLine); + +// -------------------------------------------------- +// Global Graph Data (15 Nodes) +// -------------------------------------------------- +Dcl-S AdjMatrix Ind Dim(15: 15) Inz(*Off); +Dcl-S Visited Ind Dim(15) Inz(*Off); +Dcl-S Found Ind Inz(*Off); + +Dcl-Proc MainLine; + // 1. Setup a simple graph (Node 1 connected to 2 & 3, etc.) + AdjMatrix(1: 2) = *On; AdjMatrix(2: 4) = *On; AdjMatrix(4: 8) = *On; + AdjMatrix(1: 3) = *On; AdjMatrix(3: 5) = *On; AdjMatrix(5: 9) = *On; // Path to 9 + AdjMatrix(3: 6) = *On; AdjMatrix(6: 10) = *On; + + Dsply 'Starting DFS to find Node 9...'; + + // 2. Start Search from Node 1 + DFS(1: 9); + + If Not Found; + Dsply 'Node 9 was not found.'; + EndIf; + + Return; +End-Proc; + +// -------------------------------------------------- +// Recursive DFS Subprocedure +// -------------------------------------------------- +Dcl-Proc DFS; + Dcl-Pi *N; + CurrentNode Int(10) Value; + TargetNode Int(10) Value; + End-Pi; + + Dcl-S Neighbor Int(10); + + // If already found elsewhere, stop exploring + If Found; + Return; + EndIf; + + // Mark and Print current step + Visited(CurrentNode) = *On; + Dsply ('Visiting: ' + %Char(CurrentNode)); + + // Check if this is our target + If CurrentNode = TargetNode; + Dsply '*** MATCH FOUND! ***'; + Found = *On; + Return; + EndIf; + + // Explore Neighbors (1 to 15) + For Neighbor = 1 to 15; + If AdjMatrix(CurrentNode: Neighbor) And Not Visited(Neighbor); + DFS(Neighbor: TargetNode); + + // If the recursive call found it, stop looping here too + If Found; + Return; + EndIf; + EndIf; + EndFor; + +End-Proc; diff --git a/samples/dfs.rpg.stdout b/samples/dfs.rpg.stdout new file mode 100644 index 0000000..5b83bb2 --- /dev/null +++ b/samples/dfs.rpg.stdout @@ -0,0 +1,9 @@ +DSPLY Starting DFS to find Node 9... +DSPLY Visiting: 1 +DSPLY Visiting: 2 +DSPLY Visiting: 4 +DSPLY Visiting: 8 +DSPLY Visiting: 3 +DSPLY Visiting: 5 +DSPLY Visiting: 9 +DSPLY *** MATCH FOUND! *** diff --git a/src/ast.rs b/src/ast.rs index 51e6725..7d76ad6 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -220,11 +220,14 @@ pub enum VarKeyword { /// `INZ(*named-constant)` — initialise to named constant. InzNamed(NamedConstant), Static, - /// `DIM(n)` — declares the variable as an array with `n` elements. + /// `DIM(n)` — declares the variable as a 1-D array with `n` elements. Dim(Expression), + /// `DIM(rows: cols)` — declares the variable as a 2-D array. + Dim2(Expression, Expression), Other(String), } + // ───────────────────────────────────────────────────────────────────────────── // Procedures // ───────────────────────────────────────────────────────────────────────────── diff --git a/src/codegen.rs b/src/codegen.rs index 3825d57..e9cb541 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -78,6 +78,7 @@ pub fn compile_to_object( builder, globals: HashMap::new(), array_dims: HashMap::new(), + array_cols: HashMap::new(), string_cache: HashMap::new(), global_inits: Vec::new(), }; @@ -141,6 +142,7 @@ pub fn emit_ir(program: &Program) -> Result { builder, globals: HashMap::new(), array_dims: HashMap::new(), + array_cols: HashMap::new(), string_cache: HashMap::new(), global_inits: Vec::new(), }; @@ -182,9 +184,14 @@ struct Codegen<'ctx> { builder: Builder<'ctx>, /// Module-scope global variables name -> (alloca/global ptr, TypeSpec) globals: HashMap, TypeSpec)>, - /// Array dimension table: variable name -> number of elements. - /// Populated when a `DIM(n)` keyword is encountered. + /// Array dimension table: variable name -> total number of elements. + /// Populated when a `DIM(n)` or `DIM(rows:cols)` keyword is encountered. array_dims: HashMap, + /// Column count for 2-D arrays: variable name -> number of columns. + /// Only present for variables declared with `DIM(rows: cols)`. + /// Used to convert a (row, col) subscript into a flat 0-based index: + /// flat = (row - 1) * cols + (col - 1) + array_cols: HashMap, /// Interned string literal globals (content -> global ptr). string_cache: HashMap>, /// Global declarations that need runtime initialisation (INZ with a value). @@ -252,7 +259,23 @@ impl<'ctx> Codegen<'ctx> { } }); - // Generate each procedure. + // ── Pass 1: forward-declare all procedure signatures ────────────────── + // This ensures that any procedure can call any other procedure regardless + // of source order (e.g. mainline calling dfs before dfs is defined). + for proc in &program.procedures { + let fn_name = if proc.exported { + format!("rpg_{}", proc.name) + } else { + proc.name.clone() + }; + // Only declare if not already in the module (runtime fns, etc.). + if self.module.get_function(&fn_name).is_none() { + let fn_ty = self.build_proc_fn_type(proc); + self.module.add_function(&fn_name, fn_ty, None); + } + } + + // ── Pass 2: emit procedure bodies ──────────────────────────────────── let mut exported_name: Option = None; for proc in &program.procedures { if proc.exported && exported_name.is_none() { @@ -332,8 +355,35 @@ impl<'ctx> Codegen<'ctx> { fn gen_global_decl(&mut self, decl: &Declaration) -> Result<(), CodegenError> { match decl { Declaration::Standalone(sd) => { - let size = sd.ty.byte_size().unwrap_or(8); - let arr_ty = self.context.i8_type().array_type(size as u32); + let elem_size = sd.ty.byte_size().unwrap_or(8); + + // Check for DIM(n) or DIM(rows:cols) keywords. + let dim1 = sd.keywords.iter().find_map(|kw| { + if let VarKeyword::Dim(expr) = kw { const_int_from_expr(expr) } else { None } + }); + let dim2 = sd.keywords.iter().find_map(|kw| { + if let VarKeyword::Dim2(r, c) = kw { + match (const_int_from_expr(r), const_int_from_expr(c)) { + (Some(rows), Some(cols)) => Some((rows, cols)), + _ => None, + } + } else { + None + } + }); + + let total_bytes = if let Some((rows, cols)) = dim2 { + self.array_dims.insert(sd.name.clone(), rows * cols); + self.array_cols.insert(sd.name.clone(), cols); + elem_size * rows * cols + } else if let Some(n) = dim1 { + self.array_dims.insert(sd.name.clone(), n); + elem_size * n + } else { + elem_size + }; + + let arr_ty = self.context.i8_type().array_type(total_bytes as u32); let global = self.module.add_global(arr_ty, Some(AddressSpace::default()), &sd.name); global.set_initializer(&arr_ty.const_zero()); let ptr = global.as_pointer_value(); @@ -375,12 +425,44 @@ impl<'ctx> Codegen<'ctx> { proc.name.clone() }; - let function = self.module.add_function(&fn_name, fn_ty, None); + // Re-use the forward declaration emitted in pass 1 rather than adding + // a duplicate function with the same name. + let function = self.module.get_function(&fn_name) + .unwrap_or_else(|| self.module.add_function(&fn_name, fn_ty, None)); + let entry_bb = self.context.append_basic_block(function, "entry"); self.builder.position_at_end(entry_bb); let mut state = FnState::new(function); + // ── Wire PI parameters into state.locals ────────────────────────────── + // Each incoming LLVM argument gets its own alloca slot so that the body + // can read (and write) the parameter by name just like any other local. + if let Some(pi) = &proc.pi { + for (i, param) in pi.params.iter().enumerate() { + if let Some(arg_val) = function.get_nth_param(i as u32) { + // Allocate a slot of the right size in the entry block. + let ptr = self.alloca_for_type(¶m.ty, ¶m.name); + // Store the incoming argument value. + let i64_t = self.context.i64_type(); + match arg_val { + BasicValueEnum::IntValue(iv) => { + // Extend/truncate to i64, then store at the right width. + let extended = self.builder + .build_int_s_extend_or_bit_cast(iv, i64_t, "param_ext") + .unwrap_or(iv); + self.store_value(ptr, extended.into(), ¶m.ty); + } + other => { + // Pointer / float — store as-is. + self.store_value(ptr, other, ¶m.ty); + } + } + state.locals.insert(param.name.clone(), (ptr, param.ty.clone())); + } + } + } + // Allocate locals for DCL-S inside the proc. for decl in &proc.locals { self.gen_local_decl(decl, &mut state)?; @@ -431,25 +513,40 @@ impl<'ctx> Codegen<'ctx> { fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { match decl { Declaration::Standalone(sd) => { - // Check if a DIM(n) keyword is present — if so we allocate a - // contiguous block of `n * elem_size` bytes. - let dim = sd.keywords.iter().find_map(|kw| { + // Check for DIM(n) or DIM(rows:cols) keywords. + let dim1 = sd.keywords.iter().find_map(|kw| { if let VarKeyword::Dim(expr) = kw { const_int_from_expr(expr) } else { None } }); + let dim2 = sd.keywords.iter().find_map(|kw| { + if let VarKeyword::Dim2(r, c) = kw { + match (const_int_from_expr(r), const_int_from_expr(c)) { + (Some(rows), Some(cols)) => Some((rows, cols)), + _ => None, + } + } else { + None + } + }); - let ptr = if let Some(n) = dim { - // Array: allocate n elements of the element type. + let ptr = if let Some((rows, cols)) = dim2 { + // 2-D array: allocate rows*cols elements of the element type. + self.alloca_for_type_dim(&sd.ty, &sd.name, rows * cols) + } else if let Some(n) = dim1 { + // 1-D array: allocate n elements of the element type. self.alloca_for_type_dim(&sd.ty, &sd.name, n) } else { self.alloca_for_type(&sd.ty, &sd.name) }; - // Record the dimension so %Elem and indexing can use it. - if let Some(n) = dim { + // Record the dimension(s) so %Elem and indexing can use them. + if let Some((rows, cols)) = dim2 { + self.array_dims.insert(sd.name.clone(), rows * cols); + self.array_cols.insert(sd.name.clone(), cols); + } else if let Some(n) = dim1 { self.array_dims.insert(sd.name.clone(), n); } @@ -555,6 +652,81 @@ impl<'ctx> Codegen<'ctx> { } } + /// Load a single element of type `ty` from `elem_ptr`, returning it sign-/zero-extended + /// to i64. Handles `Ind` (stored as i8, returned as i64 0/1), `Int`/`Uns` (sign-extended), + /// and falls back to returning the pointer itself for `Char` and other pointer-like types. + fn load_array_elem( + &self, + elem_ptr: PointerValue<'ctx>, + ty: &TypeSpec, + ) -> Result, CodegenError> { + let i64_t = self.context.i64_type(); + match ty { + TypeSpec::Ind => { + // Stored as i8 (1 = *On, 0 = *Off). + let i8_t = self.context.i8_type(); + let cast_ptr = self.builder.build_pointer_cast( + elem_ptr, + self.context.ptr_type(inkwell::AddressSpace::default()), + "ind_ptr_cast", + ).unwrap_or(elem_ptr); + if let Ok(v) = self.builder.build_load(i8_t, cast_ptr, "ind_elem") { + let iv = v.into_int_value(); + let ext = self.builder.build_int_z_extend(iv, i64_t, "ind_ext").unwrap_or(iv); + return Ok(ext.into()); + } + Ok(i64_t.const_zero().into()) + } + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + let bytes = ty.byte_size().unwrap_or(8); + let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); + let cast_ptr = self.builder.build_pointer_cast( + elem_ptr, + self.context.ptr_type(inkwell::AddressSpace::default()), + "int_ptr_cast", + ).unwrap_or(elem_ptr); + if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "int_elem") { + let iv = v.into_int_value(); + let ext = self.builder.build_int_s_extend(iv, i64_t, "int_ext").unwrap_or(iv); + return Ok(ext.into()); + } + Ok(i64_t.const_zero().into()) + } + _ => { + // Char / pointer-like — return the pointer itself. + Ok(elem_ptr.into()) + } + } + } + + /// Return a pointer to element `(row, col)` (both 1-based, RPG convention) + /// of a 2-D array stored in row-major order. + /// + /// flat index = (row - 1) * cols + (col - 1) + fn array_elem_ptr_2d( + &self, + base_ptr: PointerValue<'ctx>, + row: inkwell::values::IntValue<'ctx>, + col: inkwell::values::IntValue<'ctx>, + cols: u64, + elem_size: u64, + ) -> PointerValue<'ctx> { + let i64_t = self.context.i64_type(); + let one = i64_t.const_int(1, false); + let cols_val = i64_t.const_int(cols, false); + let row0 = self.builder.build_int_sub(row, one, "row0").unwrap(); + let col0 = self.builder.build_int_sub(col, one, "col0").unwrap(); + let row_off = self.builder.build_int_mul(row0, cols_val, "row_off").unwrap(); + let flat = self.builder.build_int_add(row_off, col0, "flat").unwrap(); + let elem_bytes = i64_t.const_int(elem_size, false); + let byte_off = self.builder.build_int_mul(flat, elem_bytes, "byte_off2d").unwrap(); + unsafe { + self.builder + .build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr_2d") + .unwrap() + } + } + fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> { let size = ty.byte_size().unwrap_or(0); if size == 0 { return Ok(()); } @@ -952,8 +1124,16 @@ impl<'ctx> Codegen<'ctx> { // If the LValue has an index (array assignment), compute the element pointer. let dest_ptr = if let LValue::Index(_, indices) = &a.target { - if let Some(idx_expr) = indices.first() { - let elem_size = ty.byte_size().unwrap_or(8); + let elem_size = ty.byte_size().unwrap_or(8); + if indices.len() >= 2 { + // 2-D subscript: name(row: col) — look up the column stride. + let cols = self.array_cols.get(name).copied().unwrap_or(1); + let row_val = self.gen_expression(&indices[0], state)?; + let col_val = self.gen_expression(&indices[1], state)?; + let row_i = self.coerce_to_i64(row_val); + let col_i = self.coerce_to_i64(col_val); + self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size) + } else if let Some(idx_expr) = indices.first() { let idx_val = self.gen_expression(idx_expr, state)?; let idx_i = self.coerce_to_i64(idx_val); self.array_elem_ptr(ptr, idx_i, elem_size) @@ -1323,8 +1503,6 @@ impl<'ctx> Codegen<'ctx> { Expression::Variable(qname) => { let name = qname.leaf(); if let Some((ptr, ty)) = self.resolve_var(name, state) { - let llvm_ty = self.type_spec_to_llvm(&ty) - .unwrap_or(BasicTypeEnum::IntType(i64_t)); match &ty { TypeSpec::Int(_) | TypeSpec::Uns(_) => { // Use byte_size() to get the real storage width — the @@ -1337,14 +1515,23 @@ impl<'ctx> Codegen<'ctx> { let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); return Ok(ext.into()); } + Err(CodegenError::new(format!("could not load variable '{}'", name))) + } + TypeSpec::Ind => { + // Stored as i8 (1 = *On, 0 = *Off); zero-extend to i64. + let i8_t = self.context.i8_type(); + if let Ok(v) = self.builder.build_load(i8_t, ptr, name) { + let iv = v.into_int_value(); + let ext = self.builder.build_int_z_extend(iv, i64_t, "ind_zext").unwrap_or(iv); + return Ok(ext.into()); + } + Ok(i64_t.const_zero().into()) } _ => { - // For CHAR / other types, return the pointer itself. - return Ok(ptr.into()); + // For CHAR / pointer-like types, return the pointer itself. + Ok(ptr.into()) } } - let _ = llvm_ty; - Err(CodegenError::new(format!("could not load variable '{}'", name))) } else { // Return 0 for unknown variables. Ok(i64_t.const_zero().into()) @@ -1407,33 +1594,32 @@ impl<'ctx> Codegen<'ctx> { } // No function found — check if `name` is an array variable and - // the call is actually a subscript read: name(idx). + // the call is actually a subscript read: name(idx) or name(row:col). if let Some((ptr, ty)) = self.resolve_var(name, state) { let elem_size = ty.byte_size().unwrap_or(8); - if let Some(Arg::Expr(idx_expr)) = args.first() { + + // Collect up to two Expr arguments. + let exprs: Vec<&Expression> = args.iter() + .filter_map(|a| if let Arg::Expr(e) = a { Some(e) } else { None }) + .collect(); + + let elem_ptr = if exprs.len() >= 2 { + // 2-D subscript: name(row: col) + let cols = self.array_cols.get(name).copied().unwrap_or(1); + let row_val = self.gen_expression(exprs[0], state)?; + let col_val = self.gen_expression(exprs[1], state)?; + let row_i = self.coerce_to_i64(row_val); + let col_i = self.coerce_to_i64(col_val); + self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size) + } else if let Some(idx_expr) = exprs.first() { let idx_val = self.gen_expression(idx_expr, state)?; let idx_i = self.coerce_to_i64(idx_val); - let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size); - match &ty { - TypeSpec::Int(_) | TypeSpec::Uns(_) => { - let bytes = ty.byte_size().unwrap_or(8); - let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); - let cast_ptr = self.builder.build_pointer_cast( - elem_ptr, - self.context.ptr_type(AddressSpace::default()), - "call_elem_ptr_cast", - ).unwrap_or(elem_ptr); - if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") { - let iv = v.into_int_value(); - let ext = self.builder - .build_int_s_extend(iv, i64_t, "call_elem_ext") - .unwrap_or(iv); - return Ok(ext.into()); - } - } - _ => return Ok(elem_ptr.into()), - } - } + self.array_elem_ptr(ptr, idx_i, elem_size) + } else { + ptr + }; + + return self.load_array_elem(elem_ptr, &ty); } Ok(i64_t.const_zero().into()) @@ -1446,35 +1632,29 @@ impl<'ctx> Codegen<'ctx> { } Expression::Index(qname, indices) => { - // Array element read: name(i) — RPG uses 1-based indexing. + // Array element read: name(i) or name(row: col) — RPG uses 1-based indexing. let name = qname.leaf(); if let Some((ptr, ty)) = self.resolve_var(name, state) { let elem_size = ty.byte_size().unwrap_or(8); - if let Some(idx_expr) = indices.first() { + + let elem_ptr = if indices.len() >= 2 { + // 2-D subscript: name(row: col) + let cols = self.array_cols.get(name).copied().unwrap_or(1); + let row_val = self.gen_expression(&indices[0], state)?; + let col_val = self.gen_expression(&indices[1], state)?; + let row_i = self.coerce_to_i64(row_val); + let col_i = self.coerce_to_i64(col_val); + self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size) + } else if let Some(idx_expr) = indices.first() { let idx_val = self.gen_expression(idx_expr, state)?; let idx_i = self.coerce_to_i64(idx_val); - let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size); - // Load the element with the element's integer type. - match &ty { - TypeSpec::Int(_) | TypeSpec::Uns(_) => { - let bytes = ty.byte_size().unwrap_or(8); - let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); - let cast_ptr = self.builder.build_pointer_cast( - elem_ptr, - self.context.ptr_type(AddressSpace::default()), - "elem_ptr_cast", - ).unwrap_or(elem_ptr); - if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") { - let iv = v.into_int_value(); - let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); - return Ok(ext.into()); - } - } - _ => { - return Ok(elem_ptr.into()); - } - } - } + self.array_elem_ptr(ptr, idx_i, elem_size) + } else { + ptr + }; + + // Load the element with the appropriate type. + return self.load_array_elem(elem_ptr, &ty); } Ok(i64_t.const_zero().into()) } diff --git a/src/lower.rs b/src/lower.rs index eee8ca6..4774b54 100644 --- a/src/lower.rs +++ b/src/lower.rs @@ -1352,9 +1352,18 @@ impl Parser { self.advance(); // KwDim if self.peek() == &Token::LParen { self.advance(); // ( - if let Ok(expr) = self.parse_expression() { - self.eat(&Token::RParen); - return VarKeyword::Dim(expr); + if let Ok(rows) = self.parse_expression() { + if self.eat(&Token::Colon) { + // DIM(rows: cols) — 2-D array + if let Ok(cols) = self.parse_expression() { + self.eat(&Token::RParen); + return VarKeyword::Dim2(rows, cols); + } + } else { + // DIM(n) — 1-D array + self.eat(&Token::RParen); + return VarKeyword::Dim(rows); + } } self.eat(&Token::RParen); }