Files
langrpg/src/ast.rs
2026-03-12 21:41:30 -07:00

652 lines
24 KiB
Rust

//! ast.rs — Typed Abstract Syntax Tree for RPG IV free-format programs.
//!
//! This module defines the in-memory representation produced by the lowering
//! pass (`lower.rs`) and consumed by the LLVM code-generator (`codegen.rs`).
//!
//! Only the subset of the language that is needed to compile `hello.rpg` (and
//! small programs like it) is fully fleshed out. Everything else is kept as
//! placeholder variants so the lowering pass can represent the whole parse tree
//! without panicking, and the codegen can skip unimplemented nodes gracefully.
// ─────────────────────────────────────────────────────────────────────────────
// Top-level program
// ─────────────────────────────────────────────────────────────────────────────
/// A complete RPG IV source file.
#[derive(Debug, Clone)]
pub struct Program {
/// Zero or more top-level declarations (CTL-OPT, DCL-S, DCL-C, DCL-DS,
/// file declarations, subroutines …).
pub declarations: Vec<Declaration>,
/// Zero or more procedure definitions (`DCL-PROC … END-PROC`).
pub procedures: Vec<Procedure>,
}
// ─────────────────────────────────────────────────────────────────────────────
// Declarations
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum Declaration {
/// `CTL-OPT keyword-list;`
ControlSpec(ControlSpec),
/// `DCL-S name type [keywords];`
Standalone(StandaloneDecl),
/// `DCL-C name literal;` or `DCL-C name CONST(literal);`
Constant(ConstantDecl),
/// `DCL-C name *named-constant;`
NamedConstantDecl(NamedConstantDecl),
/// `DCL-DS name … END-DS;`
DataStructure(DataStructureDecl),
/// `DCL-F name …;`
File(FileDecl),
/// `BEG-SR name; … END-SR;`
Subroutine(Subroutine),
}
// ── Control spec ──────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct ControlSpec {
pub keywords: Vec<CtlKeyword>,
}
#[derive(Debug, Clone)]
pub enum CtlKeyword {
DftActGrp(bool), // *YES / *NO
NoMain,
Main(String),
Other(String), // catch-all for keywords we don't generate code for
}
// ── Standalone variable ───────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct StandaloneDecl {
pub name: String,
pub ty: TypeSpec,
pub keywords: Vec<VarKeyword>,
}
// ── Constant declaration ──────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct ConstantDecl {
pub name: String,
pub value: Literal,
}
#[derive(Debug, Clone)]
pub struct NamedConstantDecl {
pub name: String,
pub value: NamedConstant,
}
// ── Data structure ────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct DataStructureDecl {
pub name: String,
pub keywords: Vec<DsKeyword>,
pub fields: Vec<DsField>,
}
#[derive(Debug, Clone)]
pub enum DsKeyword {
Qualified,
Template,
Other(String),
}
#[derive(Debug, Clone)]
pub struct DsField {
pub name: String,
pub ty: TypeSpec,
pub keywords: Vec<VarKeyword>,
}
// ── File declaration ──────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct FileDecl {
pub name: String,
pub keywords: Vec<String>, // simplified — not code-gen'd
}
// ── Subroutine ────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct Subroutine {
pub name: String,
pub body: Vec<Statement>,
}
// ─────────────────────────────────────────────────────────────────────────────
// Type specifications
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, PartialEq)]
pub enum TypeSpec {
/// `CHAR(n)` — fixed-length character field.
Char(Box<Expression>),
/// `VARCHAR(n)` — variable-length character.
VarChar(Box<Expression>),
/// `INT(n)` — signed integer (n = 3, 5, 10, or 20).
Int(Box<Expression>),
/// `UNS(n)` — unsigned integer.
Uns(Box<Expression>),
/// `FLOAT(n)` — floating-point.
Float(Box<Expression>),
/// `PACKED(digits:decimals)`
Packed(Box<Expression>, Box<Expression>),
/// `ZONED(digits:decimals)`
Zoned(Box<Expression>, Box<Expression>),
/// `BINDEC(digits:decimals)`
Bindec(Box<Expression>, Box<Expression>),
/// `IND` — indicator (boolean).
Ind,
/// `DATE [(*fmt)]`
Date,
/// `TIME [(*fmt)]`
Time,
/// `TIMESTAMP`
Timestamp,
/// `POINTER`
Pointer,
/// `LIKE(name)`
Like(String),
/// `LIKEDS(name)`
LikeDs(String),
/// Unrecognised / not yet implemented type.
Unknown(String),
}
impl TypeSpec {
/// Return the number of bytes this type occupies at runtime on a 64-bit
/// Linux host. Returns `None` for types whose size is not statically known.
pub fn byte_size(&self) -> Option<u64> {
match self {
TypeSpec::Char(expr) | TypeSpec::VarChar(expr) => {
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
Some(*n as u64)
} else {
None
}
}
TypeSpec::Int(expr) | TypeSpec::Uns(expr) => {
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
Some(match n {
3 => 1,
5 => 2,
10 => 4,
20 => 8,
_ => 8, // default to 8 bytes
})
} else {
None
}
}
TypeSpec::Float(expr) => {
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
Some(if *n <= 4 { 4 } else { 8 })
} else {
None
}
}
TypeSpec::Ind => Some(1),
TypeSpec::Pointer => Some(8),
TypeSpec::Packed(digits, _) => {
if let Expression::Literal(Literal::Integer(n)) = digits.as_ref() {
Some((*n as u64 / 2) + 1)
} else {
None
}
}
_ => None,
}
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Variable / declaration keywords
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum VarKeyword {
/// `INZ` — default initialisation.
Inz,
/// `INZ(expr)` — explicit initialisation value.
InzExpr(Expression),
/// `INZ(*named-constant)` — initialise to named constant.
InzNamed(NamedConstant),
Static,
Other(String),
}
// ─────────────────────────────────────────────────────────────────────────────
// Procedures
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct Procedure {
pub name: String,
pub exported: bool,
pub pi: Option<PiSpec>,
/// Local declarations (DCL-S, DCL-C, etc.) inside the procedure.
pub locals: Vec<Declaration>,
pub body: Vec<Statement>,
}
/// Procedure Interface specification (`DCL-PI … END-PI`).
#[derive(Debug, Clone)]
pub struct PiSpec {
pub name: String,
pub return_ty: Option<TypeSpec>,
pub params: Vec<PiParam>,
}
#[derive(Debug, Clone)]
pub struct PiParam {
pub name: String,
pub ty: TypeSpec,
pub keywords: Vec<ParamKeyword>,
}
#[derive(Debug, Clone)]
pub enum ParamKeyword {
Value,
Const,
Other(String),
}
// ─────────────────────────────────────────────────────────────────────────────
// Statements
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum Statement {
/// `lvalue = expr;` or `EVAL lvalue = expr;`
Assign(AssignStmt),
/// `IF expr; … [ELSEIF …] [ELSE …] ENDIF;`
If(IfStmt),
/// `DOW expr; … ENDDO;`
DoWhile(DoWhileStmt),
/// `DOU expr; … ENDDO;`
DoUntil(DoUntilStmt),
/// `FOR i = start TO/DOWNTO end [BY step]; … ENDFOR;`
For(ForStmt),
/// `SELECT; WHEN … [OTHER …] ENDSL;`
Select(SelectStmt),
/// `MONITOR; … ON-ERROR … ENDMON;`
Monitor(MonitorStmt),
/// `CALLP name(args);` or bare procedure call `name(args);`
CallP(CallPStmt),
/// `RETURN [expr];`
Return(ReturnStmt),
/// `LEAVE;`
Leave,
/// `ITER;`
Iter,
/// `LEAVESR;`
LeaveSr,
/// `EXSR name;`
ExSr(String),
/// `DSPLY expr;`
Dsply(DsplyStmt),
/// `RESET lvalue;` / `RESET *ALL;`
Reset(ResetStmt),
/// `CLEAR lvalue;`
Clear(LValue),
/// Any I/O statement (READ, WRITE, CHAIN, etc.) — kept as opaque for now.
Io(IoStatement),
/// Catch-all for statements not yet lowered.
Unimplemented(String),
}
// ── Assignment ────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct AssignStmt {
pub target: LValue,
pub value: Expression,
}
// ── If / ElseIf / Else ────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct IfStmt {
pub condition: Expression,
pub then_body: Vec<Statement>,
pub elseifs: Vec<ElseIf>,
pub else_body: Option<Vec<Statement>>,
}
#[derive(Debug, Clone)]
pub struct ElseIf {
pub condition: Expression,
pub body: Vec<Statement>,
}
// ── DOW loop ──────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct DoWhileStmt {
pub condition: Expression,
pub body: Vec<Statement>,
}
// ── DOU loop ──────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct DoUntilStmt {
pub condition: Expression,
pub body: Vec<Statement>,
}
// ── FOR loop ──────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct ForStmt {
pub var: String,
pub start: Expression,
pub limit: Expression,
pub step: Option<Expression>,
pub downto: bool,
pub body: Vec<Statement>,
}
// ── SELECT / WHEN ─────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct SelectStmt {
pub whens: Vec<WhenClause>,
pub other: Option<Vec<Statement>>,
}
#[derive(Debug, Clone)]
pub struct WhenClause {
pub condition: Expression,
pub body: Vec<Statement>,
}
// ── MONITOR ───────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct MonitorStmt {
pub body: Vec<Statement>,
pub handlers: Vec<OnError>,
}
#[derive(Debug, Clone)]
pub struct OnError {
pub codes: Vec<ErrorCode>,
pub body: Vec<Statement>,
}
#[derive(Debug, Clone)]
pub enum ErrorCode {
Integer(u32),
Program,
File,
All,
}
// ── CALLP ─────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct CallPStmt {
pub name: String,
pub args: Vec<Arg>,
}
// ── RETURN ────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct ReturnStmt {
pub value: Option<Expression>,
}
// ── DSPLY ─────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct DsplyStmt {
/// The expression to display.
pub expr: Expression,
/// Optional message queue identifier (two-operand form).
pub msg_q: Option<String>,
pub response: Option<String>,
}
// ── RESET ─────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum ResetStmt {
Target(LValue),
All,
}
// ── I/O (opaque) ──────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum IoStatement {
Read { file: String },
ReadP { file: String },
Write { record: String },
Update { record: String },
Delete { key: Expression, file: String },
Chain { key: Expression, file: String },
SetLL { key: SetKey, file: String },
SetGT { key: SetKey, file: String },
Open { file: String },
Close { file: Option<String> }, // None = *ALL
Except { format: Option<String> },
ExFmt { format: String },
Post { file: String },
Feod { file: String },
Unlock { file: String },
}
#[derive(Debug, Clone)]
pub enum SetKey {
Expr(Expression),
Start,
End,
}
// ─────────────────────────────────────────────────────────────────────────────
// L-values
// ─────────────────────────────────────────────────────────────────────────────
/// An assignable location.
#[derive(Debug, Clone, PartialEq)]
pub enum LValue {
/// Simple or dotted name: `myVar` or `ds.field`.
Name(QualifiedName),
/// Array element: `arr(i)`.
Index(QualifiedName, Vec<Expression>),
}
impl LValue {
/// Return the base name (first component of the qualified name).
pub fn base_name(&self) -> &str {
match self {
LValue::Name(q) | LValue::Index(q, _) => &q.parts[0],
}
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Expressions
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, PartialEq)]
pub enum Expression {
Literal(Literal),
Named(NamedConstant),
Special(SpecialValue),
Variable(QualifiedName),
/// Array / function-style subscript: `name(idx)`.
Index(QualifiedName, Vec<Expression>),
/// Procedure / built-in call as expression: `name(args)`.
Call(String, Vec<Arg>),
BuiltIn(BuiltIn),
UnaryMinus(Box<Expression>),
UnaryPlus(Box<Expression>),
BinOp(BinOp, Box<Expression>, Box<Expression>),
Not(Box<Expression>),
Paren(Box<Expression>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BinOp {
Add, Sub, Mul, Div, Pow,
Eq, Ne, Lt, Le, Gt, Ge,
And, Or,
}
// ─────────────────────────────────────────────────────────────────────────────
// Literals
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
String(String),
Integer(i64),
Float(f64),
Hex(Vec<u8>),
/// `*ON` / `*OFF` as a literal.
Indicator(bool),
}
// ─────────────────────────────────────────────────────────────────────────────
// Named constants (`*ON`, `*OFF`, `*BLANK`, …)
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NamedConstant {
On,
Off,
Blank,
Blanks,
Zero,
Zeros,
HiVal,
LoVal,
Null,
}
// ─────────────────────────────────────────────────────────────────────────────
// Special values (`*IN`, `*START`, …)
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, PartialEq)]
pub enum SpecialValue {
/// `*IN(n)` — indicator by number.
In(Box<Expression>),
InAll,
On,
Off,
Blank,
Blanks,
Zero,
Zeros,
HiVal,
LoVal,
Null,
/// `*ALL'string'`
All(String),
Omit,
This,
Same,
Start,
End,
}
// ─────────────────────────────────────────────────────────────────────────────
// Built-in functions
// ─────────────────────────────────────────────────────────────────────────────
/// The RPG IV `%BUILTIN(…)` functions we actually lower to code.
/// All others are wrapped in `Other`.
#[derive(Debug, Clone, PartialEq)]
pub enum BuiltIn {
/// `%LEN(identifier)` — byte length of a field.
Len(Box<Expression>),
/// `%TRIM(expr)` — trim leading and trailing blanks.
Trim(Box<Expression>),
/// `%TRIML(expr)` — trim leading blanks.
TrimL(Box<Expression>),
/// `%TRIMR(expr)` — trim trailing blanks.
TrimR(Box<Expression>),
/// `%CHAR(expr)` — convert to character string.
Char(Box<Expression>),
/// `%INT(expr)` — convert to integer.
Int(Box<Expression>),
/// `%DEC(expr:digits:decimals)` — convert to packed decimal.
Dec(Box<Expression>, Box<Expression>, Box<Expression>),
/// `%ABS(expr)` — absolute value.
Abs(Box<Expression>),
/// `%SQRT(expr)` — square root.
Sqrt(Box<Expression>),
/// `%EOF[(file)]`
Eof(Option<String>),
/// `%FOUND[(file)]`
Found(Option<String>),
/// `%ERROR()`
Error,
/// `%SUBST(str:start:len)` or `%SUBST(str:start)`.
Subst(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
/// `%SCAN(pattern:source[:start])`.
Scan(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
/// `%SIZE(identifier)`.
Size(Box<Expression>),
/// `%ADDR(identifier)`.
Addr(Box<Expression>),
/// `%ALLOC(size)`.
Alloc(Box<Expression>),
/// `%REM(a:b)`.
Rem(Box<Expression>, Box<Expression>),
/// `%DIV(a:b)`.
Div(Box<Expression>, Box<Expression>),
/// Any built-in we haven't individually modelled.
Other(String, Vec<Expression>),
}
// ─────────────────────────────────────────────────────────────────────────────
// Qualified names and argument lists
// ─────────────────────────────────────────────────────────────────────────────
/// A dot-separated name: `ds.subDs.leaf`.
#[derive(Debug, Clone, PartialEq)]
pub struct QualifiedName {
pub parts: Vec<String>,
}
impl QualifiedName {
pub fn simple(name: impl Into<String>) -> Self {
QualifiedName { parts: vec![name.into()] }
}
pub fn is_simple(&self) -> bool {
self.parts.len() == 1
}
/// Return the leaf (last) component.
pub fn leaf(&self) -> &str {
self.parts.last().map(|s| s.as_str()).unwrap_or("")
}
}
impl std::fmt::Display for QualifiedName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.parts.join("."))
}
}
/// A call argument.
#[derive(Debug, Clone, PartialEq)]
pub enum Arg {
Expr(Expression),
Omit,
}