add: compiler
This commit is contained in:
651
src/ast.rs
Normal file
651
src/ast.rs
Normal file
@@ -0,0 +1,651 @@
|
||||
//! ast.rs — Typed Abstract Syntax Tree for RPG IV free-format programs.
|
||||
//!
|
||||
//! This module defines the in-memory representation produced by the lowering
|
||||
//! pass (`lower.rs`) and consumed by the LLVM code-generator (`codegen.rs`).
|
||||
//!
|
||||
//! Only the subset of the language that is needed to compile `hello.rpg` (and
|
||||
//! small programs like it) is fully fleshed out. Everything else is kept as
|
||||
//! placeholder variants so the lowering pass can represent the whole parse tree
|
||||
//! without panicking, and the codegen can skip unimplemented nodes gracefully.
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Top-level program
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// A complete RPG IV source file.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program {
|
||||
/// Zero or more top-level declarations (CTL-OPT, DCL-S, DCL-C, DCL-DS,
|
||||
/// file declarations, subroutines …).
|
||||
pub declarations: Vec<Declaration>,
|
||||
/// Zero or more procedure definitions (`DCL-PROC … END-PROC`).
|
||||
pub procedures: Vec<Procedure>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Declarations
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Declaration {
|
||||
/// `CTL-OPT keyword-list;`
|
||||
ControlSpec(ControlSpec),
|
||||
/// `DCL-S name type [keywords];`
|
||||
Standalone(StandaloneDecl),
|
||||
/// `DCL-C name literal;` or `DCL-C name CONST(literal);`
|
||||
Constant(ConstantDecl),
|
||||
/// `DCL-C name *named-constant;`
|
||||
NamedConstantDecl(NamedConstantDecl),
|
||||
/// `DCL-DS name … END-DS;`
|
||||
DataStructure(DataStructureDecl),
|
||||
/// `DCL-F name …;`
|
||||
File(FileDecl),
|
||||
/// `BEG-SR name; … END-SR;`
|
||||
Subroutine(Subroutine),
|
||||
}
|
||||
|
||||
// ── Control spec ──────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ControlSpec {
|
||||
pub keywords: Vec<CtlKeyword>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CtlKeyword {
|
||||
DftActGrp(bool), // *YES / *NO
|
||||
NoMain,
|
||||
Main(String),
|
||||
Other(String), // catch-all for keywords we don't generate code for
|
||||
}
|
||||
|
||||
// ── Standalone variable ───────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StandaloneDecl {
|
||||
pub name: String,
|
||||
pub ty: TypeSpec,
|
||||
pub keywords: Vec<VarKeyword>,
|
||||
}
|
||||
|
||||
// ── Constant declaration ──────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ConstantDecl {
|
||||
pub name: String,
|
||||
pub value: Literal,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NamedConstantDecl {
|
||||
pub name: String,
|
||||
pub value: NamedConstant,
|
||||
}
|
||||
|
||||
// ── Data structure ────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DataStructureDecl {
|
||||
pub name: String,
|
||||
pub keywords: Vec<DsKeyword>,
|
||||
pub fields: Vec<DsField>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DsKeyword {
|
||||
Qualified,
|
||||
Template,
|
||||
Other(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DsField {
|
||||
pub name: String,
|
||||
pub ty: TypeSpec,
|
||||
pub keywords: Vec<VarKeyword>,
|
||||
}
|
||||
|
||||
// ── File declaration ──────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileDecl {
|
||||
pub name: String,
|
||||
pub keywords: Vec<String>, // simplified — not code-gen'd
|
||||
}
|
||||
|
||||
// ── Subroutine ────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Subroutine {
|
||||
pub name: String,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Type specifications
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TypeSpec {
|
||||
/// `CHAR(n)` — fixed-length character field.
|
||||
Char(Box<Expression>),
|
||||
/// `VARCHAR(n)` — variable-length character.
|
||||
VarChar(Box<Expression>),
|
||||
/// `INT(n)` — signed integer (n = 3, 5, 10, or 20).
|
||||
Int(Box<Expression>),
|
||||
/// `UNS(n)` — unsigned integer.
|
||||
Uns(Box<Expression>),
|
||||
/// `FLOAT(n)` — floating-point.
|
||||
Float(Box<Expression>),
|
||||
/// `PACKED(digits:decimals)`
|
||||
Packed(Box<Expression>, Box<Expression>),
|
||||
/// `ZONED(digits:decimals)`
|
||||
Zoned(Box<Expression>, Box<Expression>),
|
||||
/// `BINDEC(digits:decimals)`
|
||||
Bindec(Box<Expression>, Box<Expression>),
|
||||
/// `IND` — indicator (boolean).
|
||||
Ind,
|
||||
/// `DATE [(*fmt)]`
|
||||
Date,
|
||||
/// `TIME [(*fmt)]`
|
||||
Time,
|
||||
/// `TIMESTAMP`
|
||||
Timestamp,
|
||||
/// `POINTER`
|
||||
Pointer,
|
||||
/// `LIKE(name)`
|
||||
Like(String),
|
||||
/// `LIKEDS(name)`
|
||||
LikeDs(String),
|
||||
/// Unrecognised / not yet implemented type.
|
||||
Unknown(String),
|
||||
}
|
||||
|
||||
impl TypeSpec {
|
||||
/// Return the number of bytes this type occupies at runtime on a 64-bit
|
||||
/// Linux host. Returns `None` for types whose size is not statically known.
|
||||
pub fn byte_size(&self) -> Option<u64> {
|
||||
match self {
|
||||
TypeSpec::Char(expr) | TypeSpec::VarChar(expr) => {
|
||||
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
|
||||
Some(*n as u64)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
TypeSpec::Int(expr) | TypeSpec::Uns(expr) => {
|
||||
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
|
||||
Some(match n {
|
||||
3 => 1,
|
||||
5 => 2,
|
||||
10 => 4,
|
||||
20 => 8,
|
||||
_ => 8, // default to 8 bytes
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
TypeSpec::Float(expr) => {
|
||||
if let Expression::Literal(Literal::Integer(n)) = expr.as_ref() {
|
||||
Some(if *n <= 4 { 4 } else { 8 })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
TypeSpec::Ind => Some(1),
|
||||
TypeSpec::Pointer => Some(8),
|
||||
TypeSpec::Packed(digits, _) => {
|
||||
if let Expression::Literal(Literal::Integer(n)) = digits.as_ref() {
|
||||
Some((*n as u64 / 2) + 1)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Variable / declaration keywords
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum VarKeyword {
|
||||
/// `INZ` — default initialisation.
|
||||
Inz,
|
||||
/// `INZ(expr)` — explicit initialisation value.
|
||||
InzExpr(Expression),
|
||||
/// `INZ(*named-constant)` — initialise to named constant.
|
||||
InzNamed(NamedConstant),
|
||||
Static,
|
||||
Other(String),
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Procedures
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Procedure {
|
||||
pub name: String,
|
||||
pub exported: bool,
|
||||
pub pi: Option<PiSpec>,
|
||||
/// Local declarations (DCL-S, DCL-C, etc.) inside the procedure.
|
||||
pub locals: Vec<Declaration>,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
/// Procedure Interface specification (`DCL-PI … END-PI`).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PiSpec {
|
||||
pub name: String,
|
||||
pub return_ty: Option<TypeSpec>,
|
||||
pub params: Vec<PiParam>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PiParam {
|
||||
pub name: String,
|
||||
pub ty: TypeSpec,
|
||||
pub keywords: Vec<ParamKeyword>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParamKeyword {
|
||||
Value,
|
||||
Const,
|
||||
Other(String),
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Statements
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
/// `lvalue = expr;` or `EVAL lvalue = expr;`
|
||||
Assign(AssignStmt),
|
||||
/// `IF expr; … [ELSEIF …] [ELSE …] ENDIF;`
|
||||
If(IfStmt),
|
||||
/// `DOW expr; … ENDDO;`
|
||||
DoWhile(DoWhileStmt),
|
||||
/// `DOU expr; … ENDDO;`
|
||||
DoUntil(DoUntilStmt),
|
||||
/// `FOR i = start TO/DOWNTO end [BY step]; … ENDFOR;`
|
||||
For(ForStmt),
|
||||
/// `SELECT; WHEN … [OTHER …] ENDSL;`
|
||||
Select(SelectStmt),
|
||||
/// `MONITOR; … ON-ERROR … ENDMON;`
|
||||
Monitor(MonitorStmt),
|
||||
/// `CALLP name(args);` or bare procedure call `name(args);`
|
||||
CallP(CallPStmt),
|
||||
/// `RETURN [expr];`
|
||||
Return(ReturnStmt),
|
||||
/// `LEAVE;`
|
||||
Leave,
|
||||
/// `ITER;`
|
||||
Iter,
|
||||
/// `LEAVESR;`
|
||||
LeaveSr,
|
||||
/// `EXSR name;`
|
||||
ExSr(String),
|
||||
/// `DSPLY expr;`
|
||||
Dsply(DsplyStmt),
|
||||
/// `RESET lvalue;` / `RESET *ALL;`
|
||||
Reset(ResetStmt),
|
||||
/// `CLEAR lvalue;`
|
||||
Clear(LValue),
|
||||
/// Any I/O statement (READ, WRITE, CHAIN, etc.) — kept as opaque for now.
|
||||
Io(IoStatement),
|
||||
/// Catch-all for statements not yet lowered.
|
||||
Unimplemented(String),
|
||||
}
|
||||
|
||||
// ── Assignment ────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AssignStmt {
|
||||
pub target: LValue,
|
||||
pub value: Expression,
|
||||
}
|
||||
|
||||
// ── If / ElseIf / Else ────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IfStmt {
|
||||
pub condition: Expression,
|
||||
pub then_body: Vec<Statement>,
|
||||
pub elseifs: Vec<ElseIf>,
|
||||
pub else_body: Option<Vec<Statement>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ElseIf {
|
||||
pub condition: Expression,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ── DOW loop ──────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DoWhileStmt {
|
||||
pub condition: Expression,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ── DOU loop ──────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DoUntilStmt {
|
||||
pub condition: Expression,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ── FOR loop ──────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ForStmt {
|
||||
pub var: String,
|
||||
pub start: Expression,
|
||||
pub limit: Expression,
|
||||
pub step: Option<Expression>,
|
||||
pub downto: bool,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ── SELECT / WHEN ─────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SelectStmt {
|
||||
pub whens: Vec<WhenClause>,
|
||||
pub other: Option<Vec<Statement>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WhenClause {
|
||||
pub condition: Expression,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
// ── MONITOR ───────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MonitorStmt {
|
||||
pub body: Vec<Statement>,
|
||||
pub handlers: Vec<OnError>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OnError {
|
||||
pub codes: Vec<ErrorCode>,
|
||||
pub body: Vec<Statement>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ErrorCode {
|
||||
Integer(u32),
|
||||
Program,
|
||||
File,
|
||||
All,
|
||||
}
|
||||
|
||||
// ── CALLP ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CallPStmt {
|
||||
pub name: String,
|
||||
pub args: Vec<Arg>,
|
||||
}
|
||||
|
||||
// ── RETURN ────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ReturnStmt {
|
||||
pub value: Option<Expression>,
|
||||
}
|
||||
|
||||
// ── DSPLY ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DsplyStmt {
|
||||
/// The expression to display.
|
||||
pub expr: Expression,
|
||||
/// Optional message queue identifier (two-operand form).
|
||||
pub msg_q: Option<String>,
|
||||
pub response: Option<String>,
|
||||
}
|
||||
|
||||
// ── RESET ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ResetStmt {
|
||||
Target(LValue),
|
||||
All,
|
||||
}
|
||||
|
||||
// ── I/O (opaque) ──────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IoStatement {
|
||||
Read { file: String },
|
||||
ReadP { file: String },
|
||||
Write { record: String },
|
||||
Update { record: String },
|
||||
Delete { key: Expression, file: String },
|
||||
Chain { key: Expression, file: String },
|
||||
SetLL { key: SetKey, file: String },
|
||||
SetGT { key: SetKey, file: String },
|
||||
Open { file: String },
|
||||
Close { file: Option<String> }, // None = *ALL
|
||||
Except { format: Option<String> },
|
||||
ExFmt { format: String },
|
||||
Post { file: String },
|
||||
Feod { file: String },
|
||||
Unlock { file: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SetKey {
|
||||
Expr(Expression),
|
||||
Start,
|
||||
End,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// L-values
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// An assignable location.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum LValue {
|
||||
/// Simple or dotted name: `myVar` or `ds.field`.
|
||||
Name(QualifiedName),
|
||||
/// Array element: `arr(i)`.
|
||||
Index(QualifiedName, Vec<Expression>),
|
||||
}
|
||||
|
||||
impl LValue {
|
||||
/// Return the base name (first component of the qualified name).
|
||||
pub fn base_name(&self) -> &str {
|
||||
match self {
|
||||
LValue::Name(q) | LValue::Index(q, _) => &q.parts[0],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Expressions
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Expression {
|
||||
Literal(Literal),
|
||||
Named(NamedConstant),
|
||||
Special(SpecialValue),
|
||||
Variable(QualifiedName),
|
||||
/// Array / function-style subscript: `name(idx)`.
|
||||
Index(QualifiedName, Vec<Expression>),
|
||||
/// Procedure / built-in call as expression: `name(args)`.
|
||||
Call(String, Vec<Arg>),
|
||||
BuiltIn(BuiltIn),
|
||||
UnaryMinus(Box<Expression>),
|
||||
UnaryPlus(Box<Expression>),
|
||||
BinOp(BinOp, Box<Expression>, Box<Expression>),
|
||||
Not(Box<Expression>),
|
||||
Paren(Box<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum BinOp {
|
||||
Add, Sub, Mul, Div, Pow,
|
||||
Eq, Ne, Lt, Le, Gt, Ge,
|
||||
And, Or,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Literals
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
Hex(Vec<u8>),
|
||||
/// `*ON` / `*OFF` as a literal.
|
||||
Indicator(bool),
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Named constants (`*ON`, `*OFF`, `*BLANK`, …)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum NamedConstant {
|
||||
On,
|
||||
Off,
|
||||
Blank,
|
||||
Blanks,
|
||||
Zero,
|
||||
Zeros,
|
||||
HiVal,
|
||||
LoVal,
|
||||
Null,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Special values (`*IN`, `*START`, …)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum SpecialValue {
|
||||
/// `*IN(n)` — indicator by number.
|
||||
In(Box<Expression>),
|
||||
InAll,
|
||||
On,
|
||||
Off,
|
||||
Blank,
|
||||
Blanks,
|
||||
Zero,
|
||||
Zeros,
|
||||
HiVal,
|
||||
LoVal,
|
||||
Null,
|
||||
/// `*ALL'string'`
|
||||
All(String),
|
||||
Omit,
|
||||
This,
|
||||
Same,
|
||||
Start,
|
||||
End,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Built-in functions
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// The RPG IV `%BUILTIN(…)` functions we actually lower to code.
|
||||
/// All others are wrapped in `Other`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BuiltIn {
|
||||
/// `%LEN(identifier)` — byte length of a field.
|
||||
Len(Box<Expression>),
|
||||
/// `%TRIM(expr)` — trim leading and trailing blanks.
|
||||
Trim(Box<Expression>),
|
||||
/// `%TRIML(expr)` — trim leading blanks.
|
||||
TrimL(Box<Expression>),
|
||||
/// `%TRIMR(expr)` — trim trailing blanks.
|
||||
TrimR(Box<Expression>),
|
||||
/// `%CHAR(expr)` — convert to character string.
|
||||
Char(Box<Expression>),
|
||||
/// `%INT(expr)` — convert to integer.
|
||||
Int(Box<Expression>),
|
||||
/// `%DEC(expr:digits:decimals)` — convert to packed decimal.
|
||||
Dec(Box<Expression>, Box<Expression>, Box<Expression>),
|
||||
/// `%ABS(expr)` — absolute value.
|
||||
Abs(Box<Expression>),
|
||||
/// `%SQRT(expr)` — square root.
|
||||
Sqrt(Box<Expression>),
|
||||
/// `%EOF[(file)]`
|
||||
Eof(Option<String>),
|
||||
/// `%FOUND[(file)]`
|
||||
Found(Option<String>),
|
||||
/// `%ERROR()`
|
||||
Error,
|
||||
/// `%SUBST(str:start:len)` or `%SUBST(str:start)`.
|
||||
Subst(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
|
||||
/// `%SCAN(pattern:source[:start])`.
|
||||
Scan(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
|
||||
/// `%SIZE(identifier)`.
|
||||
Size(Box<Expression>),
|
||||
/// `%ADDR(identifier)`.
|
||||
Addr(Box<Expression>),
|
||||
/// `%ALLOC(size)`.
|
||||
Alloc(Box<Expression>),
|
||||
/// `%REM(a:b)`.
|
||||
Rem(Box<Expression>, Box<Expression>),
|
||||
/// `%DIV(a:b)`.
|
||||
Div(Box<Expression>, Box<Expression>),
|
||||
/// Any built-in we haven't individually modelled.
|
||||
Other(String, Vec<Expression>),
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Qualified names and argument lists
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// A dot-separated name: `ds.subDs.leaf`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct QualifiedName {
|
||||
pub parts: Vec<String>,
|
||||
}
|
||||
|
||||
impl QualifiedName {
|
||||
pub fn simple(name: impl Into<String>) -> Self {
|
||||
QualifiedName { parts: vec![name.into()] }
|
||||
}
|
||||
|
||||
pub fn is_simple(&self) -> bool {
|
||||
self.parts.len() == 1
|
||||
}
|
||||
|
||||
/// Return the leaf (last) component.
|
||||
pub fn leaf(&self) -> &str {
|
||||
self.parts.last().map(|s| s.as_str()).unwrap_or("")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for QualifiedName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.parts.join("."))
|
||||
}
|
||||
}
|
||||
|
||||
/// A call argument.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Arg {
|
||||
Expr(Expression),
|
||||
Omit,
|
||||
}
|
||||
1589
src/codegen.rs
Normal file
1589
src/codegen.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,13 @@
|
||||
//!
|
||||
//! Loads the BNF grammar embedded at compile time, builds a [`bnf::GrammarParser`],
|
||||
//! and exposes helpers used by both the compiler binary and the demo binary.
|
||||
//!
|
||||
//! Also provides the typed AST ([`ast`]), BNF-to-AST lowering pass ([`lower`]),
|
||||
//! and LLVM code-generator ([`codegen`]) used by the compiler pipeline.
|
||||
|
||||
pub mod ast;
|
||||
pub mod lower;
|
||||
pub mod codegen;
|
||||
|
||||
use bnf::{Grammar, Term};
|
||||
|
||||
|
||||
2758
src/lower.rs
Normal file
2758
src/lower.rs
Normal file
File diff suppressed because it is too large
Load Diff
429
src/main.rs
429
src/main.rs
@@ -1,7 +1,11 @@
|
||||
//! rust-langrpg — RPG IV compiler CLI
|
||||
//!
|
||||
//! Parses one or more RPG IV source files using the embedded BNF grammar
|
||||
//! and optionally writes the resulting parse tree to an output file.
|
||||
//! Full compilation pipeline:
|
||||
//! source (.rpg)
|
||||
//! → BNF validation (bnf crate)
|
||||
//! → AST lowering (lower.rs)
|
||||
//! → LLVM IR / object (codegen.rs via inkwell)
|
||||
//! → native executable (cc linker + librpgrt.so runtime)
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
@@ -9,46 +13,80 @@
|
||||
//! rust-langrpg [OPTIONS] <SOURCES>...
|
||||
//!
|
||||
//! Arguments:
|
||||
//! <SOURCES>... RPG IV source file(s) to parse
|
||||
//! <SOURCES>... RPG IV source file(s) to compile
|
||||
//!
|
||||
//! Options:
|
||||
//! -o <OUTPUT> Write the parse tree to this file
|
||||
//! -h, --help Print help
|
||||
//! -V, --version Print version
|
||||
//! -o <OUTPUT> Output executable path [default: a.out]
|
||||
//! --emit-ir Print LLVM IR to stdout instead of producing a binary
|
||||
//! --emit-tree Print BNF parse tree to stdout instead of compiling
|
||||
//! -O <LEVEL> Optimisation level 0-3 [default: 0]
|
||||
//! --no-link Produce a .o object file, skip linking
|
||||
//! --runtime <PATH> Path to librpgrt.so [default: auto-detect]
|
||||
//! -h, --help Print help
|
||||
//! -V, --version Print version
|
||||
//! ```
|
||||
//!
|
||||
//! ## Example
|
||||
//!
|
||||
//! ```text
|
||||
//! cargo run --release -- -o out.txt hello.rpg
|
||||
//! cargo run --release -- -o main hello.rpg
|
||||
//! ./main
|
||||
//! DSPLY Hello, World!
|
||||
//! ```
|
||||
|
||||
use std::{
|
||||
fs,
|
||||
io::{self, Write},
|
||||
|
||||
path::PathBuf,
|
||||
process,
|
||||
};
|
||||
|
||||
use clap::Parser;
|
||||
use rust_langrpg::{load_grammar, parse_as};
|
||||
use clap::Parser as ClapParser;
|
||||
use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// CLI definition
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// RPG IV free-format compiler — parses source files and emits parse trees.
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "rust-langrpg", version, about, long_about = None)]
|
||||
/// RPG IV free-format compiler — produces native Linux executables from RPG IV
|
||||
/// source files using LLVM as the back-end.
|
||||
#[derive(ClapParser, Debug)]
|
||||
#[command(
|
||||
name = "rust-langrpg",
|
||||
version,
|
||||
about = "RPG IV compiler (LLVM back-end)",
|
||||
long_about = None,
|
||||
)]
|
||||
struct Cli {
|
||||
/// RPG IV source file(s) to parse.
|
||||
/// RPG IV source file(s) to compile.
|
||||
#[arg(required = true, value_name = "SOURCES")]
|
||||
sources: Vec<PathBuf>,
|
||||
|
||||
/// Write the parse tree(s) to this file.
|
||||
/// If omitted the tree is not printed.
|
||||
/// Write the output executable (or object with --no-link) to this path.
|
||||
/// If omitted the binary is written to `a.out`.
|
||||
#[arg(short = 'o', value_name = "OUTPUT")]
|
||||
output: Option<PathBuf>,
|
||||
|
||||
/// Emit LLVM IR text to stdout instead of compiling to a binary.
|
||||
#[arg(long = "emit-ir")]
|
||||
emit_ir: bool,
|
||||
|
||||
/// Emit the BNF parse tree to stdout instead of compiling.
|
||||
#[arg(long = "emit-tree")]
|
||||
emit_tree: bool,
|
||||
|
||||
/// Optimisation level: 0 = none, 1 = less, 2 = default, 3 = aggressive.
|
||||
#[arg(short = 'O', default_value = "0", value_name = "LEVEL")]
|
||||
opt_level: u8,
|
||||
|
||||
/// Produce a `.o` object file but do not invoke the linker.
|
||||
#[arg(long = "no-link")]
|
||||
no_link: bool,
|
||||
|
||||
/// Path to the `librpgrt.so` runtime shared library.
|
||||
/// If not specified the compiler searches in common locations.
|
||||
#[arg(long = "runtime", value_name = "PATH")]
|
||||
runtime: Option<PathBuf>,
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -58,7 +96,7 @@ struct Cli {
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// ── Load grammar ─────────────────────────────────────────────────────────
|
||||
// ── Load and build the BNF grammar ───────────────────────────────────────
|
||||
let grammar = match load_grammar() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
@@ -67,61 +105,139 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
// ── Build parser ─────────────────────────────────────────────────────────
|
||||
let parser = match grammar.build_parser() {
|
||||
let bnf_parser = match grammar.build_parser() {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
eprintln!("error: failed to build parser: {e}");
|
||||
eprintln!("error: failed to build BNF parser: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// ── Open output sink ──────────────────────────────────────────────────────
|
||||
// `output` is Box<dyn Write> so we can use either a file or a sink that
|
||||
// discards everything when -o was not supplied.
|
||||
let mut output: Box<dyn Write> = match &cli.output {
|
||||
Some(path) => {
|
||||
let file = fs::File::create(path).unwrap_or_else(|e| {
|
||||
eprintln!("error: cannot open output file '{}': {e}", path.display());
|
||||
process::exit(1);
|
||||
});
|
||||
Box::new(io::BufWriter::new(file))
|
||||
}
|
||||
None => Box::new(io::sink()),
|
||||
};
|
||||
|
||||
// ── Process each source file ──────────────────────────────────────────────
|
||||
// ── Process each source file ─────────────────────────────────────────────
|
||||
let mut any_error = false;
|
||||
|
||||
for path in &cli.sources {
|
||||
let source = match fs::read_to_string(path) {
|
||||
for source_path in &cli.sources {
|
||||
let source_text = match fs::read_to_string(source_path) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
eprintln!("error: cannot read '{}': {e}", path.display());
|
||||
eprintln!("error: cannot read '{}': {e}", source_path.display());
|
||||
any_error = true;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Try the top-level "program" rule first; fall back to "source-file"
|
||||
// so the binary is useful even if only one of those rule names exists
|
||||
// in the grammar.
|
||||
let tree = parse_as(&parser, source.trim(), "program")
|
||||
.or_else(|| parse_as(&parser, source.trim(), "source-file"));
|
||||
// ── BNF validation ────────────────────────────────────────────────────
|
||||
let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program")
|
||||
.or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file"));
|
||||
|
||||
match tree {
|
||||
Some(t) => {
|
||||
eprintln!("ok: {}", path.display());
|
||||
writeln!(output, "=== {} ===", path.display())
|
||||
.and_then(|_| writeln!(output, "{t}"))
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("error: write failed: {e}");
|
||||
any_error = true;
|
||||
});
|
||||
}
|
||||
None => {
|
||||
eprintln!("error: '{}' did not match the RPG IV grammar", path.display());
|
||||
if tree_opt.is_none() {
|
||||
eprintln!(
|
||||
"error: '{}' did not match the RPG IV grammar",
|
||||
source_path.display()
|
||||
);
|
||||
any_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── --emit-tree: print parse tree and stop ────────────────────────────
|
||||
if cli.emit_tree {
|
||||
println!("=== {} ===", source_path.display());
|
||||
println!("{}", tree_opt.unwrap());
|
||||
eprintln!("ok: {} (parse tree emitted)", source_path.display());
|
||||
continue;
|
||||
}
|
||||
|
||||
eprintln!("ok: {} (BNF valid)", source_path.display());
|
||||
|
||||
// ── Lower to typed AST ────────────────────────────────────────────────
|
||||
let program = match lower(source_text.trim()) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
eprintln!("error: lowering '{}' failed: {e}", source_path.display());
|
||||
any_error = true;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!(
|
||||
"ok: {} ({} declaration(s), {} procedure(s))",
|
||||
source_path.display(),
|
||||
program.declarations.len(),
|
||||
program.procedures.len(),
|
||||
);
|
||||
|
||||
// ── --emit-ir: print LLVM IR and stop ────────────────────────────────
|
||||
if cli.emit_ir {
|
||||
match codegen::emit_ir(&program) {
|
||||
Ok(ir) => {
|
||||
print!("{}", ir);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("error: IR emission failed for '{}': {e}", source_path.display());
|
||||
any_error = true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Determine output path ─────────────────────────────────────────────
|
||||
let out_path = if cli.no_link {
|
||||
// Object file: replace source extension with .o
|
||||
let mut p = cli.output.clone().unwrap_or_else(|| {
|
||||
let mut base = source_path.clone();
|
||||
base.set_extension("o");
|
||||
base
|
||||
});
|
||||
if p.extension().and_then(|e| e.to_str()) != Some("o") {
|
||||
p.set_extension("o");
|
||||
}
|
||||
p
|
||||
} else {
|
||||
// Executable: use -o, or default to a.out
|
||||
cli.output.clone().unwrap_or_else(|| PathBuf::from("a.out"))
|
||||
};
|
||||
|
||||
// ── Compile to object file ────────────────────────────────────────────
|
||||
let obj_path: PathBuf = if cli.no_link {
|
||||
out_path.clone()
|
||||
} else {
|
||||
// Temporary object file alongside the final binary.
|
||||
let stem = source_path
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("rpg_prog");
|
||||
let mut tmp = std::env::temp_dir();
|
||||
tmp.push(format!("{}.rpg.o", stem));
|
||||
tmp
|
||||
};
|
||||
|
||||
match codegen::compile_to_object(&program, &obj_path, cli.opt_level) {
|
||||
Ok(()) => {
|
||||
eprintln!("ok: object → {}", obj_path.display());
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"error: codegen failed for '{}': {e}",
|
||||
source_path.display()
|
||||
);
|
||||
any_error = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Link if requested ─────────────────────────────────────────────────
|
||||
if !cli.no_link {
|
||||
let runtime = find_runtime(cli.runtime.as_deref());
|
||||
match link_executable(&obj_path, &out_path, runtime.as_deref()) {
|
||||
Ok(()) => {
|
||||
eprintln!("ok: executable → {}", out_path.display());
|
||||
// Clean up the temporary object.
|
||||
let _ = fs::remove_file(&obj_path);
|
||||
}
|
||||
Err(msg) => {
|
||||
eprintln!("error: linking failed: {msg}");
|
||||
any_error = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,3 +246,206 @@ fn main() {
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Linker invocation
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Invoke the system C compiler to link `obj_path` into `exe_path`.
|
||||
///
|
||||
/// We use `cc` (which wraps the system linker) rather than calling `ld`
|
||||
/// directly so that the C runtime startup files (`crt0.o`, `crti.o`, etc.) are
|
||||
/// included automatically — this is the same approach Clang uses when building
|
||||
/// executables.
|
||||
fn link_executable(
|
||||
obj_path: &std::path::Path,
|
||||
exe_path: &std::path::Path,
|
||||
runtime: Option<&std::path::Path>,
|
||||
) -> Result<(), String> {
|
||||
let mut cmd = process::Command::new("cc");
|
||||
|
||||
cmd.arg(obj_path)
|
||||
.arg("-o")
|
||||
.arg(exe_path);
|
||||
|
||||
// Link against the RPG runtime shared library.
|
||||
match runtime {
|
||||
Some(rt) => {
|
||||
// Explicit path: use -L <dir> -lrpgrt (or pass the .so directly).
|
||||
if rt.is_file() {
|
||||
// Absolute path to the .so — pass directly.
|
||||
cmd.arg(rt);
|
||||
} else if rt.is_dir() {
|
||||
cmd.arg(format!("-L{}", rt.display()))
|
||||
.arg("-lrpgrt");
|
||||
} else {
|
||||
cmd.arg(format!("-L{}", rt.display()))
|
||||
.arg("-lrpgrt");
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// No explicit runtime specified — link against libc only.
|
||||
// The program will need librpgrt.so to be in LD_LIBRARY_PATH at
|
||||
// runtime, or the user must build and install it separately.
|
||||
cmd.arg("-lc");
|
||||
}
|
||||
}
|
||||
|
||||
// Allow the runtime library to be found at execution time relative to the
|
||||
// executable (rpath tricks).
|
||||
if let Some(rt) = runtime {
|
||||
if let Some(dir) = rt.parent() {
|
||||
let rpath = format!("-Wl,-rpath,{}", dir.display());
|
||||
cmd.arg(rpath);
|
||||
}
|
||||
}
|
||||
|
||||
let status = cmd
|
||||
.status()
|
||||
.map_err(|e| format!("could not run linker `cc`: {e}"))?;
|
||||
|
||||
if status.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format!("`cc` exited with status {}", status))
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Runtime library discovery
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Search for `librpgrt.so` in well-known locations.
|
||||
///
|
||||
/// Checked in order:
|
||||
/// 1. `RPGRT_LIB` environment variable
|
||||
/// 2. Same directory as the compiler executable
|
||||
/// 3. `target/debug/` or `target/release/` relative to the current directory
|
||||
/// (useful when running via `cargo run`)
|
||||
/// 4. `/usr/local/lib`
|
||||
/// 5. `/usr/lib`
|
||||
fn find_runtime(explicit: Option<&std::path::Path>) -> Option<PathBuf> {
|
||||
// Honour an explicitly supplied path first.
|
||||
if let Some(p) = explicit {
|
||||
return Some(p.to_path_buf());
|
||||
}
|
||||
|
||||
// Check the environment variable.
|
||||
if let Ok(val) = std::env::var("RPGRT_LIB") {
|
||||
let p = PathBuf::from(val);
|
||||
if p.exists() {
|
||||
return Some(p);
|
||||
}
|
||||
}
|
||||
|
||||
// Probe standard locations.
|
||||
let candidates = [
|
||||
// Alongside the running binary.
|
||||
std::env::current_exe()
|
||||
.ok()
|
||||
.and_then(|e| e.parent().map(|d| d.join("librpgrt.so"))),
|
||||
// Cargo target directories.
|
||||
Some(PathBuf::from("target/debug/librpgrt.so")),
|
||||
Some(PathBuf::from("target/release/librpgrt.so")),
|
||||
Some(PathBuf::from("target/debug/deps/librpgrt.so")),
|
||||
// System-wide.
|
||||
Some(PathBuf::from("/usr/local/lib/librpgrt.so")),
|
||||
Some(PathBuf::from("/usr/lib/librpgrt.so")),
|
||||
];
|
||||
|
||||
for candidate in candidates.into_iter().flatten() {
|
||||
if candidate.exists() {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Integration smoke test (compile-time only — no process spawning needed)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rust_langrpg::{codegen::emit_ir, lower::lower};
|
||||
|
||||
/// The hello.rpg from the repository root must compile all the way through
|
||||
/// to LLVM IR without errors.
|
||||
#[test]
|
||||
fn hello_rpg_emits_ir() {
|
||||
let src = include_str!("../hello.rpg");
|
||||
let prog = lower(src.trim()).expect("lower hello.rpg");
|
||||
let ir = emit_ir(&prog).expect("emit_ir hello.rpg");
|
||||
|
||||
// The IR must define at least one function.
|
||||
assert!(
|
||||
ir.contains("define"),
|
||||
"IR should contain at least one function definition:\n{}",
|
||||
&ir[..ir.len().min(1000)]
|
||||
);
|
||||
|
||||
// The IR must reference the dsply runtime call.
|
||||
assert!(
|
||||
ir.contains("rpg_dsply"),
|
||||
"IR should reference rpg_dsply:\n{}",
|
||||
&ir[..ir.len().min(1000)]
|
||||
);
|
||||
|
||||
// There must be a C main() wrapper so the binary is directly executable.
|
||||
assert!(
|
||||
ir.contains("@main"),
|
||||
"IR should contain a @main entry point:\n{}",
|
||||
&ir[..ir.len().min(1000)]
|
||||
);
|
||||
}
|
||||
|
||||
/// A minimal RPG IV program with an integer variable and a loop must
|
||||
/// compile to IR that contains branch instructions (i.e. the loop was
|
||||
/// actually code-generated, not silently dropped).
|
||||
#[test]
|
||||
fn loop_program_emits_branches() {
|
||||
let src = r#"
|
||||
CTL-OPT DFTACTGRP(*NO);
|
||||
|
||||
DCL-S counter INT(10) INZ(0);
|
||||
|
||||
DCL-PROC main EXPORT;
|
||||
DCL-S i INT(10);
|
||||
FOR i = 1 TO 10;
|
||||
counter = counter + i;
|
||||
ENDFOR;
|
||||
RETURN;
|
||||
END-PROC;
|
||||
"#;
|
||||
let prog = lower(src.trim()).expect("lower loop program");
|
||||
let ir = emit_ir(&prog).expect("emit_ir loop program");
|
||||
assert!(
|
||||
ir.contains("br "),
|
||||
"loop IR should contain branch instructions:\n{}",
|
||||
&ir[..ir.len().min(2000)]
|
||||
);
|
||||
}
|
||||
|
||||
/// An IF/ELSE conditional must produce a conditional branch in the IR.
|
||||
#[test]
|
||||
fn conditional_program_emits_conditional_branch() {
|
||||
let src = r#"
|
||||
DCL-PROC check EXPORT;
|
||||
DCL-S x INT(10) INZ(5);
|
||||
IF x = 5;
|
||||
RETURN;
|
||||
ELSE;
|
||||
RETURN;
|
||||
ENDIF;
|
||||
END-PROC;
|
||||
"#;
|
||||
let prog = lower(src.trim()).expect("lower conditional program");
|
||||
let ir = emit_ir(&prog).expect("emit_ir conditional program");
|
||||
assert!(
|
||||
ir.contains("br i1"),
|
||||
"conditional IR should contain 'br i1':\n{}",
|
||||
&ir[..ir.len().min(2000)]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user