//! lower.rs — Tokenizer and recursive-descent lowering pass. //! //! Converts validated RPG IV free-format source text into the typed [`Program`] //! AST defined in `ast.rs`. //! //! The BNF parser (`bnf` crate) is only used to *validate* the source; the //! lowering pass is a hand-written recursive descent parser that produces the //! richer typed tree needed by the LLVM code generator. //! //! ## Supported subset //! //! The pass fully lowers the constructs needed to compile `hello.rpg` and //! programs of similar complexity: //! //! * `CTL-OPT` control specs //! * `DCL-S` / `DCL-C` / `DCL-DS` / `DCL-F` declarations //! * `DCL-PROC … END-PROC` procedures with `DCL-PI … END-PI` //! * `BEG-SR … END-SR` subroutines //! * All expression forms (literals, arithmetic, logical, comparisons, BIFs) //! * `DSPLY`, `RETURN`, `LEAVE`, `ITER`, `LEAVESR`, `EXSR`, `CLEAR`, `RESET` //! * `IF/ELSEIF/ELSE/ENDIF`, `DOW/ENDDO`, `DOU/ENDDO`, `FOR/ENDFOR` //! * `SELECT/WHEN/OTHER/ENDSL`, `MONITOR/ON-ERROR/ENDMON` //! * `CALLP` and bare procedure calls //! * All I/O statements (kept as `Statement::Io`) //! * Assignment (`lvalue = expr` / `EVAL lvalue = expr`) //! //! Constructs outside this subset produce `Statement::Unimplemented` or //! `Declaration` placeholder variants rather than hard errors, so the //! compiler can still lower the parts it understands. use crate::ast::*; // ───────────────────────────────────────────────────────────────────────────── // Public entry point // ───────────────────────────────────────────────────────────────────────────── /// Lower `source` into a typed [`Program`]. /// /// Returns `Err` only if the tokenizer fails completely. Individual /// unrecognised constructs are silently kept as `Unimplemented` nodes. pub fn lower(source: &str) -> Result { let tokens = tokenize(source)?; let mut parser = Parser::new(tokens); let program = parser.parse_program()?; Ok(program) } /// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`, /// `**CTDATA`) by blanking out those lines before tokenization. fn strip_star_star_directives(source: &str) -> String { source .lines() .map(|line| { let trimmed = line.trim_start(); if trimmed.starts_with("**") { // Replace with an empty line so line numbers stay consistent. "" } else { line } }) .collect::>() .join("\n") } // ───────────────────────────────────────────────────────────────────────────── // Error type // ───────────────────────────────────────────────────────────────────────────── #[derive(Debug)] pub struct LowerError { pub message: String, /// 1-based source line where the error was detected, if known. pub line: Option, } impl std::fmt::Display for LowerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if let Some(ln) = self.line { write!(f, "lower error (line {}): {}", ln, self.message) } else { write!(f, "lower error: {}", self.message) } } } impl std::error::Error for LowerError {} impl LowerError { fn new(msg: impl Into) -> Self { LowerError { message: msg.into(), line: None } } fn at(line: usize, msg: impl Into) -> Self { LowerError { message: msg.into(), line: Some(line) } } } // ───────────────────────────────────────────────────────────────────────────── // Tokens // ───────────────────────────────────────────────────────────────────────────── #[derive(Debug, Clone, PartialEq)] enum Token { // Keywords / compound keywords KwCtlOpt, KwDclS, KwDclC, KwDclDs, KwEndDs, KwDclF, KwDclProc, KwEndProc, KwDclPi, KwEndPi, KwBegSr, KwEndSr, // Type specifiers KwChar, KwVarChar, KwGraph, KwVarGraph, KwUcs2, KwVarUcs2, KwInt, KwUns, KwFloat, KwPacked, KwZoned, KwBindec, KwInd, KwDate, KwTime, KwTimestamp, KwPointer, KwProcPtr, KwLike, KwLikeDs, KwLikeRec, KwObject, // Variable keywords KwInz, KwConst, KwValue, KwStatic, KwBased, KwDim, KwAscend, KwDescend, KwAltSeq, KwOpDesc, KwNoOpt, KwVolatile, KwOptions, // Proc keywords KwExport, KwExtProc, KwDclCase, // Control option keywords KwNoMain, KwMain, KwDftActGrp, KwActGrp, KwOption, KwDatFmt, KwTimFmt, KwDecEdit, KwAlwNull, KwDebug, KwExpOpts, KwBndDir, KwCopyright, KwStgMdl, KwTruncNbr, KwText, // DS keywords KwQualified, KwTemplate, KwExtName, KwLikeRec2, // Statements KwEval, KwEvalR, KwEvalCorr, KwIf, KwElseIf, KwElse, KwEndIf, KwDow, KwDou, KwEndDo, KwFor, KwTo, KwDownTo, KwBy, KwEndFor, KwSelect, KwWhen, KwOther, KwEndSl, KwMonitor, KwOnError, KwEndMon, KwReturn, KwLeave, KwIter, KwLeaveSr, KwExSr, KwCallP, KwDsply, KwReset, KwClear, KwSortA, KwDump, KwForce, KwPost, KwFeod, KwUnlock, KwDeAlloc, // I/O statements KwRead, KwReadP, KwReadE, KwReadPE, KwWrite, KwUpdate, KwDelete, KwChain, KwSetLL, KwSetGT, KwOpen, KwClose, KwExcept, KwExFmt, KwIn, KwOut, KwUnlockFile, KwCommit, KwRollback, // Named constants / special values KwOn, // *ON KwOff, // *OFF KwBlank, // *BLANK KwBlanks, // *BLANKS KwZero, // *ZERO KwZeros, // *ZEROS KwHiVal, // *HIVAL KwLoVal, // *LOVAL KwNull, // *NULL KwYes, // *YES KwNo, // *NO KwAll, // *ALL KwStart, // *START KwEnd, // *END KwProgram, // *PROGRAM KwFile, // *FILE (in ON-ERROR) KwOmit, // *OMIT KwThis, // *THIS KwSame, // *SAME KwIn2, // *IN KwCaller, // *CALLER KwNew, // *NEW KwNoPass, // *NOPASS KwVarSize, // *VARSIZE KwUsrCtl, // *USRCTL KwInputOnly,// *INPUTONLY KwInputOpt, // *INPUT (usage) KwOutputOpt,// *OUTPUT (usage) KwUpdateOpt,// *UPDATE KwDeleteOpt,// *DELETE KwKey, // *KEY KwSrcStmt, // *SRCSTMT KwNodeBugIo,// *NODEBUGIO KwNoUnRef, // *NOUNREF KwNoShowCpy,// *NOSHOWCPY KwResDecPos,// *RESDECPOS KwSnglLvl, // *SNGLVL KwTeraSpace,// *TERASPACE KwInherit, // *INHERIT KwExtDft, // *EXTDFT KwCl, // *CL // Date/time formats KwMdy, KwDmy, KwYmd, KwJul, KwIso, KwUsa, KwEur, KwJis, KwHms, // Duration codes KwYears, KwMonths, KwDays, KwHours, KwMinutes, KwSeconds, KwMSeconds, // Logical operators (keyword form) KwAnd, KwOr, KwNot, // Built-in function names BifAbs, BifAddr, BifAlloc, BifBitAnd, BifBitNot, BifBitOr, BifBitXor, BifChar, BifCheck, BifCheckR, BifDate, BifDays, BifDec, BifDecH, BifDecPos, BifDiff, BifDiv, BifEditC, BifEditFlt, BifEditW, BifElem, BifEof, BifEqual, BifError, BifFields, BifFloat, BifFound, BifGraph, BifHours, BifInt, BifIntH, BifKds, BifLen, BifMinutes, BifMonths, BifMSeconds, BifNullInd, BifOccur, BifOpen, BifPAddr, BifParms, BifReAlloc, BifRem, BifReplace, BifScan, BifScanR, BifSeconds, BifShtDn, BifSize, BifSqrt, BifStatus, BifStr, BifSubArr, BifSubst, BifThis, BifTime, BifTimestamp, BifTrim, BifTrimL, BifTrimR, BifUcs2, BifUns, BifUnsH, BifXFoot, BifXLate, BifYears, // Special eval option tokens (single-char keyword args) LitH, // 'H' inside eval(...) LitT, // 'T' LitE, // 'E' LitN, // 'N' LitA, // 'A' LitD, // 'D' // Operators OpStar2, // ** OpStar, // * OpPlus, OpMinus, OpSlash, OpEq, OpNe, // <> OpLe, // <= OpGe, // >= OpLt, OpGt, // Punctuation LParen, RParen, Semicolon, Colon, Dot, // Primitives Identifier(String), StringLit(String), IntLit(i64), FloatLit(f64), HexLit(String), Eof, } // ───────────────────────────────────────────────────────────────────────────── // Tokenizer // ───────────────────────────────────────────────────────────────────────────── fn tokenize(source: &str) -> Result, LowerError> { // Strip **FREE / **CTDATA / any **word compiler directives first. let cleaned = strip_star_star_directives(source); let chars: Vec = cleaned.chars().collect(); let mut pos = 0; let mut tokens = Vec::new(); let mut line: usize = 1; while pos < chars.len() { // Track line numbers. if chars[pos] == '\n' { line += 1; pos += 1; continue; } // Skip other whitespace if chars[pos].is_whitespace() { pos += 1; continue; } // Line comments: // ... if pos + 1 < chars.len() && chars[pos] == '/' && chars[pos + 1] == '/' { while pos < chars.len() && chars[pos] != '\n' { pos += 1; } continue; } // Block comments: /* ... */ if pos + 1 < chars.len() && chars[pos] == '/' && chars[pos + 1] == '*' { pos += 2; while pos + 1 < chars.len() { if chars[pos] == '*' && chars[pos + 1] == '/' { pos += 2; break; } pos += 1; } continue; } // String literal if chars[pos] == '\'' { pos += 1; let mut s = String::new(); while pos < chars.len() { if chars[pos] == '\'' { // '' means a literal single-quote inside the string if pos + 1 < chars.len() && chars[pos + 1] == '\'' { s.push('\''); pos += 2; } else { pos += 1; break; } } else { s.push(chars[pos]); pos += 1; } } tokens.push(Token::StringLit(s)); continue; } // Hex literal X'...' if chars[pos] == 'X' || chars[pos] == 'x' { if pos + 1 < chars.len() && chars[pos + 1] == '\'' { pos += 2; let mut h = String::new(); while pos < chars.len() && chars[pos] != '\'' { h.push(chars[pos]); pos += 1; } if pos < chars.len() { pos += 1; } tokens.push(Token::HexLit(h)); continue; } } // Numeric literal if chars[pos].is_ascii_digit() || (chars[pos] == '.' && pos + 1 < chars.len() && chars[pos + 1].is_ascii_digit()) { let start = pos; while pos < chars.len() && chars[pos].is_ascii_digit() { pos += 1; } if pos < chars.len() && chars[pos] == '.' { pos += 1; while pos < chars.len() && chars[pos].is_ascii_digit() { pos += 1; } let s: String = chars[start..pos].iter().collect(); let f: f64 = s.parse().unwrap_or(0.0); tokens.push(Token::FloatLit(f)); } else { let s: String = chars[start..pos].iter().collect(); let n: i64 = s.parse().unwrap_or(0); tokens.push(Token::IntLit(n)); } continue; } // Operators and punctuation match chars[pos] { '(' => { tokens.push(Token::LParen); pos += 1; continue; } ')' => { tokens.push(Token::RParen); pos += 1; continue; } ';' => { tokens.push(Token::Semicolon); pos += 1; continue; } ':' => { tokens.push(Token::Colon); pos += 1; continue; } '.' => { tokens.push(Token::Dot); pos += 1; continue; } '+' => { tokens.push(Token::OpPlus); pos += 1; continue; } '-' => { tokens.push(Token::OpMinus); pos += 1; continue; } '/' => { tokens.push(Token::OpSlash); pos += 1; continue; } '=' => { tokens.push(Token::OpEq); pos += 1; continue; } '*' => { if pos + 1 < chars.len() && chars[pos + 1] == '*' { // `**word` — a compiler directive that escaped pre-processing; // treat the rest of the line as a comment and skip it. if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() { while pos < chars.len() && chars[pos] != '\n' { pos += 1; } continue; } tokens.push(Token::OpStar2); pos += 2; } else { // Could be start of *ON, *OFF, *BLANK etc. // peek ahead let next_pos = pos + 1; if next_pos < chars.len() && (chars[next_pos].is_alphabetic() || chars[next_pos] == '_') { // Read the *WORD pos += 1; // skip * let start = pos; while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_') { pos += 1; } let word: String = chars[start..pos].iter().collect(); let upper = word.to_uppercase(); let tok = match upper.as_str() { "ON" => Token::KwOn, "OFF" => Token::KwOff, "BLANK" => Token::KwBlank, "BLANKS" => Token::KwBlanks, "ZERO" => Token::KwZero, "ZEROS" => Token::KwZeros, "HIVAL" => Token::KwHiVal, "LOVAL" => Token::KwLoVal, "NULL" => Token::KwNull, "YES" => Token::KwYes, "NO" => Token::KwNo, "ALL" => Token::KwAll, "START" => Token::KwStart, "END" => Token::KwEnd, "PROGRAM" => Token::KwProgram, "FILE" => Token::KwFile, "OMIT" => Token::KwOmit, "THIS" => Token::KwThis, "SAME" => Token::KwSame, "IN" => Token::KwIn2, "CALLER" => Token::KwCaller, "NEW" => Token::KwNew, "NOPASS" => Token::KwNoPass, "VARSIZE" => Token::KwVarSize, "USRCTL" => Token::KwUsrCtl, "INPUTONLY" => Token::KwInputOnly, "INPUT" => Token::KwInputOpt, "OUTPUT" => Token::KwOutputOpt, "UPDATE" => Token::KwUpdateOpt, "DELETE" => Token::KwDeleteOpt, "KEY" => Token::KwKey, "SRCSTMT" => Token::KwSrcStmt, "NODEBUGIO" => Token::KwNodeBugIo, "NOUNREF" => Token::KwNoUnRef, "NOSHOWCPY" => Token::KwNoShowCpy, "RESDECPOS" => Token::KwResDecPos, "SNGLVL" => Token::KwSnglLvl, "TERASPACE" => Token::KwTeraSpace, "INHERIT" => Token::KwInherit, "EXTDFT" => Token::KwExtDft, "CL" => Token::KwCl, "DCLCASE" => Token::KwDclCase, "YEARS" => Token::KwYears, "MONTHS" => Token::KwMonths, "DAYS" => Token::KwDays, "HOURS" => Token::KwHours, "MINUTES" => Token::KwMinutes, "SECONDS" => Token::KwSeconds, "MSECONDS" => Token::KwMSeconds, "N" => Token::LitN, _ => Token::Identifier(format!("*{}", word)), }; tokens.push(tok); } else { tokens.push(Token::OpStar); pos += 1; } } continue; } '<' => { if pos + 1 < chars.len() && chars[pos + 1] == '>' { tokens.push(Token::OpNe); pos += 2; } else if pos + 1 < chars.len() && chars[pos + 1] == '=' { tokens.push(Token::OpLe); pos += 2; } else { tokens.push(Token::OpLt); pos += 1; } continue; } '>' => { if pos + 1 < chars.len() && chars[pos + 1] == '=' { tokens.push(Token::OpGe); pos += 2; } else { tokens.push(Token::OpGt); pos += 1; } continue; } '%' => { // Built-in function pos += 1; let start = pos; while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_') { pos += 1; } let name: String = chars[start..pos].iter().collect(); let upper = name.to_uppercase(); let tok = match upper.as_str() { "ABS" => Token::BifAbs, "ADDR" => Token::BifAddr, "ALLOC" => Token::BifAlloc, "BITAND" => Token::BifBitAnd, "BITNOT" => Token::BifBitNot, "BITOR" => Token::BifBitOr, "BITXOR" => Token::BifBitXor, "CHAR" => Token::BifChar, "CHECK" => Token::BifCheck, "CHECKR" => Token::BifCheckR, "DATE" => Token::BifDate, "DAYS" => Token::BifDays, "DEC" => Token::BifDec, "DECH" => Token::BifDecH, "DECPOS" => Token::BifDecPos, "DIFF" => Token::BifDiff, "DIV" => Token::BifDiv, "EDITC" => Token::BifEditC, "EDITFLT" => Token::BifEditFlt, "EDITW" => Token::BifEditW, "ELEM" => Token::BifElem, "EOF" => Token::BifEof, "EQUAL" => Token::BifEqual, "ERROR" => Token::BifError, "FIELDS" => Token::BifFields, "FLOAT" => Token::BifFloat, "FOUND" => Token::BifFound, "GRAPH" => Token::BifGraph, "HOURS" => Token::BifHours, "INT" => Token::BifInt, "INTH" => Token::BifIntH, "KDS" => Token::BifKds, "LEN" => Token::BifLen, "MINUTES" => Token::BifMinutes, "MONTHS" => Token::BifMonths, "MSECONDS" => Token::BifMSeconds, "NULLIND" => Token::BifNullInd, "OCCUR" => Token::BifOccur, "OPEN" => Token::BifOpen, "PADDR" => Token::BifPAddr, "PARMS" => Token::BifParms, "REALLOC" => Token::BifReAlloc, "REM" => Token::BifRem, "REPLACE" => Token::BifReplace, "SCAN" => Token::BifScan, "SCANR" => Token::BifScanR, "SECONDS" => Token::BifSeconds, "SHTDN" => Token::BifShtDn, "SIZE" => Token::BifSize, "SQRT" => Token::BifSqrt, "STATUS" => Token::BifStatus, "STR" => Token::BifStr, "SUBARR" => Token::BifSubArr, "SUBST" => Token::BifSubst, "THIS" => Token::BifThis, "TIME" => Token::BifTime, "TIMESTAMP" => Token::BifTimestamp, "TRIM" => Token::BifTrim, "TRIML" => Token::BifTrimL, "TRIMR" => Token::BifTrimR, "UCS2" => Token::BifUcs2, "UNS" => Token::BifUns, "UNSH" => Token::BifUnsH, "XFOOT" => Token::BifXFoot, "XLATE" => Token::BifXLate, "YEARS" => Token::BifYears, _ => Token::Identifier(format!("%{}", name)), }; tokens.push(tok); continue; } _ => {} } // Identifier / keyword — may include hyphens (DCL-S, CTL-OPT, etc.) if chars[pos].is_alphabetic() || chars[pos] == '_' || chars[pos] == '@' || chars[pos] == '#' || chars[pos] == '$' { let start = pos; while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_' || chars[pos] == '-' || chars[pos] == '@' || chars[pos] == '#' || chars[pos] == '$') { // Hyphens are part of compound keywords only — stop at operator context // We include hyphens here and will classify after. // Edge case: we must not swallow the `-` that's an arithmetic minus. // Strategy: include hyphen only if the previous char was alpha/digit // and the next char is also alpha. (handled below) if chars[pos] == '-' { if pos + 1 < chars.len() && chars[pos + 1].is_alphabetic() { pos += 1; // include hyphen } else { break; } } else { pos += 1; } } let word: String = chars[start..pos].iter().collect(); let upper = word.to_uppercase(); let tok = keyword_or_ident(&upper, &word); tokens.push(tok); continue; } // Unknown character — skip pos += 1; } tokens.push(Token::Eof); let _ = line; // line tracking available for future per-token storage Ok(tokens) } /// Classify a word (already upper-cased) as a keyword token or identifier. fn keyword_or_ident(upper: &str, original: &str) -> Token { match upper { // Compound declaration keywords "CTL-OPT" => Token::KwCtlOpt, "DCL-S" => Token::KwDclS, "DCL-C" => Token::KwDclC, "DCL-DS" => Token::KwDclDs, "END-DS" => Token::KwEndDs, "DCL-F" => Token::KwDclF, "DCL-PROC" => Token::KwDclProc, "END-PROC" => Token::KwEndProc, "DCL-PI" => Token::KwDclPi, "END-PI" => Token::KwEndPi, "BEG-SR" => Token::KwBegSr, "END-SR" => Token::KwEndSr, "ON-ERROR" => Token::KwOnError, "EVAL-CORR" => Token::KwEvalCorr, // Type keywords "CHAR" => Token::KwChar, "VARCHAR" => Token::KwVarChar, "GRAPH" => Token::KwGraph, "VARGRAPH" => Token::KwVarGraph, "UCS2" => Token::KwUcs2, "VARUCS2" => Token::KwVarUcs2, "INT" => Token::KwInt, "UNS" => Token::KwUns, "FLOAT" => Token::KwFloat, "PACKED" => Token::KwPacked, "ZONED" => Token::KwZoned, "BINDEC" => Token::KwBindec, "IND" => Token::KwInd, "DATE" => Token::KwDate, "TIME" => Token::KwTime, "TIMESTAMP" => Token::KwTimestamp, "POINTER" => Token::KwPointer, "PROCPTR" => Token::KwProcPtr, "LIKE" => Token::KwLike, "LIKEDS" => Token::KwLikeDs, "LIKEREC" => Token::KwLikeRec, "OBJECT" => Token::KwObject, // Variable / struct / param keywords "INZ" => Token::KwInz, "CONST" => Token::KwConst, "VALUE" => Token::KwValue, "STATIC" => Token::KwStatic, "BASED" => Token::KwBased, "DIM" => Token::KwDim, "ASCEND" => Token::KwAscend, "DESCEND" => Token::KwDescend, "ALTSEQ" => Token::KwAltSeq, "OPDESC" => Token::KwOpDesc, "NOOPT" => Token::KwNoOpt, "VOLATILE" => Token::KwVolatile, "OPTIONS" => Token::KwOptions, "QUALIFIED" => Token::KwQualified, "TEMPLATE" => Token::KwTemplate, "EXTNAME" => Token::KwExtName, // Proc keywords "EXPORT" => Token::KwExport, "EXTPROC" => Token::KwExtProc, // Control option keywords "NOMAIN" => Token::KwNoMain, "MAIN" => Token::KwMain, "DFTACTGRP" => Token::KwDftActGrp, "ACTGRP" => Token::KwActGrp, "OPTION" => Token::KwOption, "DATFMT" => Token::KwDatFmt, "TIMFMT" => Token::KwTimFmt, "DECEDIT" => Token::KwDecEdit, "ALWNULL" => Token::KwAlwNull, "DEBUG" => Token::KwDebug, "EXPROPTS" => Token::KwExpOpts, "BNDDIR" => Token::KwBndDir, "COPYRIGHT" => Token::KwCopyright, "STGMDL" => Token::KwStgMdl, "TRUNCNBR" => Token::KwTruncNbr, "TEXT" => Token::KwText, // Statement keywords "EVAL" => Token::KwEval, "EVALR" => Token::KwEvalR, "IF" => Token::KwIf, "ELSEIF" => Token::KwElseIf, "ELSE" => Token::KwElse, "ENDIF" => Token::KwEndIf, "DOW" => Token::KwDow, "DOU" => Token::KwDou, "ENDDO" => Token::KwEndDo, "FOR" => Token::KwFor, "TO" => Token::KwTo, "DOWNTO" => Token::KwDownTo, "BY" => Token::KwBy, "ENDFOR" => Token::KwEndFor, "SELECT" => Token::KwSelect, "WHEN" => Token::KwWhen, "OTHER" => Token::KwOther, "ENDSL" => Token::KwEndSl, "MONITOR" => Token::KwMonitor, "ENDMON" => Token::KwEndMon, "RETURN" => Token::KwReturn, "LEAVE" => Token::KwLeave, "ITER" => Token::KwIter, "LEAVESR" => Token::KwLeaveSr, "EXSR" => Token::KwExSr, "CALLP" => Token::KwCallP, "DSPLY" => Token::KwDsply, "RESET" => Token::KwReset, "CLEAR" => Token::KwClear, "SORTA" => Token::KwSortA, "DUMP" => Token::KwDump, "FORCE" => Token::KwForce, "POST" => Token::KwPost, "FEOD" => Token::KwFeod, "UNLOCK" => Token::KwUnlock, "DEALLOC" => Token::KwDeAlloc, // I/O "READ" => Token::KwRead, "READP" => Token::KwReadP, "READE" => Token::KwReadE, "READPE" => Token::KwReadPE, "WRITE" => Token::KwWrite, "UPDATE" => Token::KwUpdate, "DELETE" => Token::KwDelete, "CHAIN" => Token::KwChain, "SETLL" => Token::KwSetLL, "SETGT" => Token::KwSetGT, "OPEN" => Token::KwOpen, "CLOSE" => Token::KwClose, "EXCEPT" => Token::KwExcept, "EXFMT" => Token::KwExFmt, "COMMIT" => Token::KwCommit, "ROLLBACK" => Token::KwRollback, // Logical operators "AND" => Token::KwAnd, "OR" => Token::KwOr, "NOT" => Token::KwNot, // Date/time formats (bare keyword versions) "MDY" => Token::KwMdy, "DMY" => Token::KwDmy, "YMD" => Token::KwYmd, "JUL" => Token::KwJul, "ISO" => Token::KwIso, "USA" => Token::KwUsa, "EUR" => Token::KwEur, "JIS" => Token::KwJis, "HMS" => Token::KwHms, _ => Token::Identifier(original.to_string()), } } // ───────────────────────────────────────────────────────────────────────────── // Parser // ───────────────────────────────────────────────────────────────────────────── struct Parser { tokens: Vec, pos: usize, _line: usize, } impl Parser { fn new(tokens: Vec) -> Self { Parser { tokens, pos: 0, _line: 1 } } fn peek(&self) -> &Token { self.tokens.get(self.pos).unwrap_or(&Token::Eof) } fn peek2(&self) -> &Token { self.tokens.get(self.pos + 1).unwrap_or(&Token::Eof) } fn advance(&mut self) -> Token { let tok = self.tokens.get(self.pos).cloned().unwrap_or(Token::Eof); if tok != Token::Eof { self.pos += 1; } tok } fn expect(&mut self, expected: &Token) -> Result<(), LowerError> { let tok = self.advance(); if &tok == expected { Ok(()) } else { Err(LowerError::new(format!( "expected {:?}, got {:?} (token index {})", expected, tok, self.pos ))) } } fn eat(&mut self, expected: &Token) -> bool { if self.peek() == expected { self.advance(); true } else { false } } fn eat_semicolon(&mut self) { self.eat(&Token::Semicolon); } fn is_eof(&self) -> bool { matches!(self.peek(), Token::Eof) } // ── Top-level ────────────────────────────────────────────────────────── fn parse_program(&mut self) -> Result { let mut declarations = Vec::new(); let mut procedures = Vec::new(); let mut skipped_tokens: Vec = Vec::new(); while !self.is_eof() { match self.peek() { Token::KwDclProc => { if !skipped_tokens.is_empty() { skipped_tokens.clear(); } match self.parse_procedure() { Ok(p) => procedures.push(p), Err(e) => { eprintln!("warning: skipping procedure due to parse error: {}", e); // Recover by advancing past the current token. self.advance(); } } } Token::KwCtlOpt | Token::KwDclS | Token::KwDclC | Token::KwDclDs | Token::KwDclF | Token::KwBegSr => { if !skipped_tokens.is_empty() { skipped_tokens.clear(); } match self.parse_declaration() { Ok(d) => declarations.push(d), Err(e) => { eprintln!("warning: skipping declaration due to parse error: {}", e); self.advance(); } } } tok => { // Accumulate unrecognised top-level tokens so we can report // them as a meaningful diagnostic. skipped_tokens.push(format!("{:?}", tok)); self.advance(); } } } if !skipped_tokens.is_empty() { eprintln!( "warning: {} unrecognised top-level token(s) were skipped: {}", skipped_tokens.len(), skipped_tokens.join(", ") ); } Ok(Program { declarations, procedures }) } // ── Declarations ─────────────────────────────────────────────────────── fn parse_declaration(&mut self) -> Result { match self.peek().clone() { Token::KwCtlOpt => self.parse_ctl_opt(), Token::KwDclS => self.parse_dcl_s(), Token::KwDclC => self.parse_dcl_c(), Token::KwDclDs => self.parse_dcl_ds(), Token::KwDclF => self.parse_dcl_f(), Token::KwBegSr => self.parse_subroutine(), tok => Err(LowerError::new(format!( "unexpected token in declaration: {:?} — \ expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR", tok ))), } } fn parse_ctl_opt(&mut self) -> Result { self.advance(); // KwCtlOpt let mut keywords = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { let kw = self.parse_ctl_keyword(); keywords.push(kw); } self.eat_semicolon(); Ok(Declaration::ControlSpec(ControlSpec { keywords })) } fn parse_ctl_keyword(&mut self) -> CtlKeyword { match self.peek().clone() { Token::KwDftActGrp => { self.advance(); let val = self.parse_star_bool_arg(); CtlKeyword::DftActGrp(val) } Token::KwNoMain => { self.advance(); CtlKeyword::NoMain } Token::KwMain => { self.advance(); let name = self.parse_paren_ident().unwrap_or_default(); CtlKeyword::Main(name) } tok => { // Consume the keyword and any parenthesised argument let s = format!("{:?}", tok); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } CtlKeyword::Other(s) } } } /// Parse `(*YES)` or `(*NO)` returning a boolean. fn parse_star_bool_arg(&mut self) -> bool { if self.peek() != &Token::LParen { return true; } self.advance(); // ( let result = match self.peek() { Token::KwYes => { self.advance(); true } Token::KwNo => { self.advance(); false } _ => { self.advance(); false } }; self.eat(&Token::RParen); result } fn parse_paren_ident(&mut self) -> Option { if self.peek() != &Token::LParen { return None; } self.advance(); // ( let name = if let Token::Identifier(s) = self.peek().clone() { self.advance(); Some(s) } else { None }; self.eat(&Token::RParen); name } fn parse_dcl_s(&mut self) -> Result { self.advance(); // KwDclS let name = self.expect_name()?; let ty = self.parse_type_spec()?; let mut keywords = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { keywords.push(self.parse_var_keyword()); } self.eat_semicolon(); Ok(Declaration::Standalone(StandaloneDecl { name, ty, keywords })) } fn parse_dcl_c(&mut self) -> Result { self.advance(); // KwDclC let name = self.expect_name()?; // `DCL-C name CONST(literal)` or `DCL-C name literal` or `DCL-C name *named` match self.peek().clone() { Token::KwConst => { self.advance(); // CONST self.expect(&Token::LParen)?; let lit = self.parse_literal()?; self.expect(&Token::RParen)?; self.eat_semicolon(); Ok(Declaration::Constant(ConstantDecl { name, value: lit })) } Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | Token::KwNull => { let nc = self.parse_named_constant()?; self.eat_semicolon(); Ok(Declaration::NamedConstantDecl(NamedConstantDecl { name, value: nc })) } _ => { let lit = self.parse_literal()?; self.eat_semicolon(); Ok(Declaration::Constant(ConstantDecl { name, value: lit })) } } } fn parse_dcl_ds(&mut self) -> Result { self.advance(); // KwDclDs let name = self.expect_name()?; let mut keywords = Vec::new(); // Parse DS keywords (before ';') while !matches!(self.peek(), Token::Semicolon | Token::Eof) { match self.peek().clone() { Token::KwQualified => { self.advance(); keywords.push(DsKeyword::Qualified); } Token::KwTemplate => { self.advance(); keywords.push(DsKeyword::Template); } tok => { let s = format!("{:?}", tok); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } keywords.push(DsKeyword::Other(s)); } } } self.eat_semicolon(); let mut fields = Vec::new(); // Parse fields until END-DS while !matches!(self.peek(), Token::KwEndDs | Token::Eof) { if let Ok(field) = self.parse_ds_field() { fields.push(field); } else { self.advance(); } } self.eat(&Token::KwEndDs); self.eat_semicolon(); Ok(Declaration::DataStructure(DataStructureDecl { name, keywords, fields })) } fn parse_ds_field(&mut self) -> Result { let name = self.expect_name()?; let ty = self.parse_type_spec()?; let mut keywords = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { keywords.push(self.parse_var_keyword()); } self.eat_semicolon(); Ok(DsField { name, ty, keywords }) } fn parse_dcl_f(&mut self) -> Result { self.advance(); // KwDclF let name = self.expect_name()?; let mut keywords = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { let s = format!("{:?}", self.peek().clone()); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } keywords.push(s); } self.eat_semicolon(); Ok(Declaration::File(FileDecl { name, keywords })) } fn parse_subroutine(&mut self) -> Result { self.advance(); // KwBegSr let name = self.expect_name()?; self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwEndSr]); self.eat(&Token::KwEndSr); self.eat_semicolon(); Ok(Declaration::Subroutine(Subroutine { name, body })) } // ── Type specifiers ──────────────────────────────────────────────────── fn parse_type_spec(&mut self) -> Result { match self.peek().clone() { Token::KwChar | Token::KwVarChar | Token::KwGraph | Token::KwVarGraph | Token::KwUcs2 | Token::KwVarUcs2 => { let is_char = matches!(self.peek(), Token::KwChar); self.advance(); self.expect(&Token::LParen)?; let expr = self.parse_expression()?; self.expect(&Token::RParen)?; if is_char { Ok(TypeSpec::Char(Box::new(expr))) } else { Ok(TypeSpec::VarChar(Box::new(expr))) } } Token::KwInt => { self.advance(); self.expect(&Token::LParen)?; let expr = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Int(Box::new(expr))) } Token::KwUns => { self.advance(); self.expect(&Token::LParen)?; let expr = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Uns(Box::new(expr))) } Token::KwFloat => { self.advance(); self.expect(&Token::LParen)?; let expr = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Float(Box::new(expr))) } Token::KwPacked => { self.advance(); self.expect(&Token::LParen)?; let digits = self.parse_expression()?; self.expect(&Token::Colon)?; let decimals = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Packed(Box::new(digits), Box::new(decimals))) } Token::KwZoned => { self.advance(); self.expect(&Token::LParen)?; let digits = self.parse_expression()?; self.expect(&Token::Colon)?; let decimals = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Zoned(Box::new(digits), Box::new(decimals))) } Token::KwBindec => { self.advance(); self.expect(&Token::LParen)?; let digits = self.parse_expression()?; self.expect(&Token::Colon)?; let decimals = self.parse_expression()?; self.expect(&Token::RParen)?; Ok(TypeSpec::Bindec(Box::new(digits), Box::new(decimals))) } Token::KwInd => { self.advance(); Ok(TypeSpec::Ind) } Token::KwDate => { self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } Ok(TypeSpec::Date) } Token::KwTime => { self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } Ok(TypeSpec::Time) } Token::KwTimestamp => { self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } Ok(TypeSpec::Timestamp) } Token::KwPointer => { self.advance(); Ok(TypeSpec::Pointer) } Token::KwLike => { self.advance(); let name = self.parse_paren_ident().unwrap_or_default(); Ok(TypeSpec::Like(name)) } Token::KwLikeDs => { self.advance(); let name = self.parse_paren_ident().unwrap_or_default(); Ok(TypeSpec::LikeDs(name)) } tok => { Err(LowerError::new(format!("expected type spec, got {:?}", tok))) } } } // ── Variable keyword ────────────────────────────────────────────────── fn parse_var_keyword(&mut self) -> VarKeyword { match self.peek().clone() { Token::KwDim => { self.advance(); // KwDim if self.peek() == &Token::LParen { self.advance(); // ( if let Ok(expr) = self.parse_expression() { self.eat(&Token::RParen); return VarKeyword::Dim(expr); } self.eat(&Token::RParen); } VarKeyword::Other("DIM".to_string()) } Token::KwInz => { self.advance(); if self.peek() == &Token::LParen { self.advance(); // ( // Check for named constant match self.peek().clone() { Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | Token::KwNull => { if let Ok(nc) = self.parse_named_constant() { self.eat(&Token::RParen); return VarKeyword::InzNamed(nc); } } Token::KwExtDft => { self.advance(); self.eat(&Token::RParen); return VarKeyword::Other("INZ(*EXTDFT)".to_string()); } _ => {} } if let Ok(expr) = self.parse_expression() { self.eat(&Token::RParen); return VarKeyword::InzExpr(expr); } self.eat(&Token::RParen); VarKeyword::Inz } else { VarKeyword::Inz } } Token::KwStatic => { self.advance(); VarKeyword::Static } tok => { let s = format!("{:?}", tok); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } VarKeyword::Other(s) } } } // ── Procedure ───────────────────────────────────────────────────────── fn parse_procedure(&mut self) -> Result { self.advance(); // KwDclProc let name = self.expect_name()?; let mut exported = false; let mut kw_tokens = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { match self.peek().clone() { Token::KwExport => { self.advance(); exported = true; } tok => { kw_tokens.push(format!("{:?}", tok)); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } } } } self.eat_semicolon(); // Optional DCL-PI let pi = if self.peek() == &Token::KwDclPi { Some(self.parse_pi()?) } else { None }; // Local declarations let mut locals = Vec::new(); loop { match self.peek() { Token::KwDclS | Token::KwDclC | Token::KwDclDs | Token::KwDclF | Token::KwBegSr => { if let Ok(d) = self.parse_declaration() { locals.push(d); } } _ => break, } } // Body statements until END-PROC let body = self.parse_statement_list(&[Token::KwEndProc]); self.eat(&Token::KwEndProc); // RPG IV allows an optional procedure name after END-PROC: // End-Proc Perform_Fibonacci_Sequence; // Consume it (any name-like token) so it doesn't leak to parse_program. let _ = self.try_parse_name(); self.eat_semicolon(); Ok(Procedure { name, exported, pi, locals, body }) } fn parse_pi(&mut self) -> Result { self.advance(); // KwDclPi // name or *N let name = match self.peek().clone() { Token::Identifier(s) => { self.advance(); s } _ => { // Could be *N, or a keyword used as a name, or a real keyword. if let Some(n) = token_as_name(self.peek()) { self.advance(); n } else { self.advance(); "*N".to_string() } } }; // optional return type before ';' let return_ty = if !matches!(self.peek(), Token::Semicolon | Token::Eof) { self.parse_type_spec().ok() } else { None }; self.eat_semicolon(); let mut params = Vec::new(); while !matches!(self.peek(), Token::KwEndPi | Token::Eof) { if let Ok(p) = self.parse_pi_param() { params.push(p); } else { self.advance(); } } self.eat(&Token::KwEndPi); self.eat_semicolon(); Ok(PiSpec { name, return_ty, params }) } fn parse_pi_param(&mut self) -> Result { let name = self.expect_name()?; let ty = self.parse_type_spec()?; let mut keywords = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { match self.peek().clone() { Token::KwValue => { self.advance(); keywords.push(ParamKeyword::Value); } Token::KwConst => { self.advance(); keywords.push(ParamKeyword::Const); } tok => { let s = format!("{:?}", tok); self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } keywords.push(ParamKeyword::Other(s)); } } } self.eat_semicolon(); Ok(PiParam { name, ty, keywords }) } // ── Statement list ───────────────────────────────────────────────────── fn parse_statement_list(&mut self, terminators: &[Token]) -> Vec { let mut stmts = Vec::new(); loop { if self.is_eof() { break; } if terminators.iter().any(|t| t == self.peek()) { break; } // Also stop at certain keywords that signal end-of-block if matches!(self.peek(), Token::KwElse | Token::KwElseIf | Token::KwEndIf | Token::KwEndDo | Token::KwEndFor | Token::KwEndSl | Token::KwEndMon | Token::KwWhen | Token::KwOther | Token::KwOnError | Token::KwEndSr ) { break; } match self.parse_statement() { Ok(s) => stmts.push(s), Err(_) => { self.recover_to_semicolon(); } } } stmts } fn recover_to_semicolon(&mut self) { while !matches!(self.peek(), Token::Semicolon | Token::Eof) { self.advance(); } self.eat(&Token::Semicolon); } // ── Individual statements ────────────────────────────────────────────── fn parse_statement(&mut self) -> Result { match self.peek().clone() { Token::KwReturn => self.parse_return(), Token::KwLeave => { self.advance(); self.eat_semicolon(); Ok(Statement::Leave) } Token::KwIter => { self.advance(); self.eat_semicolon(); Ok(Statement::Iter) } Token::KwLeaveSr => { self.advance(); self.eat_semicolon(); Ok(Statement::LeaveSr) } Token::KwExSr => self.parse_exsr(), Token::KwDsply => self.parse_dsply(), Token::KwIf => self.parse_if(), Token::KwDow => self.parse_dow(), Token::KwDou => self.parse_dou(), Token::KwFor => self.parse_for(), Token::KwSelect => self.parse_select(), Token::KwMonitor => self.parse_monitor(), Token::KwEval | Token::KwEvalR | Token::KwEvalCorr => self.parse_assign_eval(), Token::KwCallP => self.parse_callp(), Token::KwClear => self.parse_clear(), Token::KwReset => self.parse_reset(), Token::KwDump => { self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } self.eat_semicolon(); Ok(Statement::Unimplemented("DUMP".into())) } Token::KwSortA => { self.advance(); let _ = self.expect_ident(); self.eat_semicolon(); Ok(Statement::Unimplemented("SORTA".into())) } Token::KwDeAlloc => { self.advance(); if self.peek() == &Token::LParen { self.skip_paren_group(); } let _ = self.expect_ident(); self.eat_semicolon(); Ok(Statement::Unimplemented("DEALLOC".into())) } Token::KwForce | Token::KwPost | Token::KwFeod | Token::KwUnlock => { let kw = format!("{:?}", self.advance()); let _ = self.expect_ident(); self.eat_semicolon(); Ok(Statement::Unimplemented(kw)) } // I/O statements Token::KwRead => self.parse_read(), Token::KwReadP => self.parse_readp(), Token::KwReadE => self.parse_reade(), Token::KwReadPE => self.parse_readpe(), Token::KwWrite => self.parse_write(), Token::KwUpdate => self.parse_update(), Token::KwDelete => self.parse_delete(), Token::KwChain => self.parse_chain(), Token::KwSetLL => self.parse_setll(), Token::KwSetGT => self.parse_setgt(), Token::KwOpen => self.parse_open(), Token::KwClose => self.parse_close(), Token::KwExcept => self.parse_except(), Token::KwExFmt => self.parse_exfmt(), Token::KwCommit | Token::KwRollback => { let kw = format!("{:?}", self.advance()); self.eat_semicolon(); Ok(Statement::Unimplemented(kw)) } Token::Identifier(_) => { // Could be: implicit CALLP `name(args);` or assignment `name = expr;` self.parse_ident_stmt() } tok => { Err(LowerError::new(format!("unexpected statement token: {:?}", tok))) } } } fn parse_return(&mut self) -> Result { self.advance(); // KwReturn if self.peek() == &Token::Semicolon { self.advance(); return Ok(Statement::Return(ReturnStmt { value: None })); } let expr = self.parse_expression().ok(); self.eat_semicolon(); Ok(Statement::Return(ReturnStmt { value: expr })) } fn parse_exsr(&mut self) -> Result { self.advance(); // KwExSr let name = self.expect_name()?; self.eat_semicolon(); Ok(Statement::ExSr(name)) } fn parse_dsply(&mut self) -> Result { self.advance(); // KwDsply // Two forms: // DSPLY expr; // DSPLY (expr : msgq : response); if self.peek() == &Token::LParen { // peek ahead — if the next token after '(' looks like an expression // followed by ':' it's the three-arg form self.advance(); // ( let expr = self.parse_expression()?; let mut msg_q = None; let mut response = None; if self.eat(&Token::Colon) { if let Token::Identifier(s) = self.peek().clone() { self.advance(); msg_q = Some(s); } else { self.eat(&Token::Colon); } if self.eat(&Token::Colon) { if let Token::Identifier(s) = self.peek().clone() { self.advance(); response = Some(s); } } } self.eat(&Token::RParen); self.eat_semicolon(); Ok(Statement::Dsply(DsplyStmt { expr, msg_q, response })) } else { let expr = self.parse_expression()?; self.eat_semicolon(); Ok(Statement::Dsply(DsplyStmt { expr, msg_q: None, response: None })) } } fn parse_if(&mut self) -> Result { self.advance(); // KwIf let condition = self.parse_expression()?; self.eat_semicolon(); let then_body = self.parse_statement_list(&[ Token::KwElseIf, Token::KwElse, Token::KwEndIf, ]); let mut elseifs = Vec::new(); while self.peek() == &Token::KwElseIf { self.advance(); let cond = self.parse_expression()?; self.eat_semicolon(); let body = self.parse_statement_list(&[ Token::KwElseIf, Token::KwElse, Token::KwEndIf, ]); elseifs.push(ElseIf { condition: cond, body }); } let else_body = if self.eat(&Token::KwElse) { self.eat_semicolon(); Some(self.parse_statement_list(&[Token::KwEndIf])) } else { None }; self.eat(&Token::KwEndIf); self.eat_semicolon(); Ok(Statement::If(IfStmt { condition, then_body, elseifs, else_body })) } fn parse_dow(&mut self) -> Result { self.advance(); // KwDow let condition = self.parse_expression()?; self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwEndDo]); self.eat(&Token::KwEndDo); self.eat_semicolon(); Ok(Statement::DoWhile(DoWhileStmt { condition, body })) } fn parse_dou(&mut self) -> Result { self.advance(); // KwDou let condition = self.parse_expression()?; self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwEndDo]); self.eat(&Token::KwEndDo); self.eat_semicolon(); Ok(Statement::DoUntil(DoUntilStmt { condition, body })) } fn parse_for(&mut self) -> Result { self.advance(); // KwFor let var = self.expect_ident()?; self.expect(&Token::OpEq)?; let start = self.parse_expression()?; let downto = if self.eat(&Token::KwDownTo) { true } else { self.eat(&Token::KwTo); false }; let limit = self.parse_expression()?; let step = if self.eat(&Token::KwBy) { self.parse_expression().ok() } else { None }; self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwEndFor]); self.eat(&Token::KwEndFor); self.eat_semicolon(); Ok(Statement::For(ForStmt { var, start, limit, step, downto, body })) } fn parse_select(&mut self) -> Result { self.advance(); // KwSelect self.eat_semicolon(); let mut whens = Vec::new(); while self.peek() == &Token::KwWhen { self.advance(); let cond = self.parse_expression()?; self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwWhen, Token::KwOther, Token::KwEndSl]); whens.push(WhenClause { condition: cond, body }); } let other = if self.eat(&Token::KwOther) { self.eat_semicolon(); Some(self.parse_statement_list(&[Token::KwEndSl])) } else { None }; self.eat(&Token::KwEndSl); self.eat_semicolon(); Ok(Statement::Select(SelectStmt { whens, other })) } fn parse_monitor(&mut self) -> Result { self.advance(); // KwMonitor self.eat_semicolon(); let body = self.parse_statement_list(&[Token::KwOnError]); let mut handlers = Vec::new(); while self.peek() == &Token::KwOnError { self.advance(); let mut codes = Vec::new(); while !matches!(self.peek(), Token::Semicolon | Token::Eof) { match self.peek().clone() { Token::IntLit(n) => { self.advance(); codes.push(ErrorCode::Integer(n as u32)); } Token::KwProgram => { self.advance(); codes.push(ErrorCode::Program); } Token::KwFile => { self.advance(); codes.push(ErrorCode::File); } Token::KwAll => { self.advance(); codes.push(ErrorCode::All); } Token::Colon => { self.advance(); } _ => { self.advance(); } } } self.eat_semicolon(); let hbody = self.parse_statement_list(&[Token::KwOnError, Token::KwEndMon]); handlers.push(OnError { codes, body: hbody }); } self.eat(&Token::KwEndMon); self.eat_semicolon(); Ok(Statement::Monitor(MonitorStmt { body, handlers })) } fn parse_assign_eval(&mut self) -> Result { self.advance(); // EVAL / EVALR / EVAL-CORR // Optional (H/T/E) option if self.peek() == &Token::LParen { self.skip_paren_group(); } let target = self.parse_lvalue()?; self.expect(&Token::OpEq)?; let value = self.parse_expression()?; self.eat_semicolon(); Ok(Statement::Assign(AssignStmt { target, value })) } fn parse_callp(&mut self) -> Result { self.advance(); // KwCallP let name = self.expect_name()?; let args = if self.peek() == &Token::LParen { self.parse_arg_list()? } else { Vec::new() }; self.eat_semicolon(); Ok(Statement::CallP(CallPStmt { name, args })) } fn parse_clear(&mut self) -> Result { self.advance(); // KwClear let lv = self.parse_lvalue()?; self.eat_semicolon(); Ok(Statement::Clear(lv)) } fn parse_reset(&mut self) -> Result { self.advance(); // KwReset if self.eat(&Token::KwAll) { self.eat_semicolon(); return Ok(Statement::Reset(ResetStmt::All)); } let lv = self.parse_lvalue()?; self.eat_semicolon(); Ok(Statement::Reset(ResetStmt::Target(lv))) } // ── I/O statements ───────────────────────────────────────────────────── fn maybe_parse_io_option(&mut self) -> bool { if self.peek() == &Token::LParen { // Could be (E) or (N) let saved = self.pos; self.advance(); // ( let ok = matches!(self.peek(), Token::LitE | Token::LitN | Token::Identifier(_)); if ok { self.advance(); // option letter self.eat(&Token::RParen); true } else { self.pos = saved; false } } else { false } } fn parse_read(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let file = self.expect_ident()?; let _ = self.try_parse_ident(); // optional indicator var self.eat_semicolon(); Ok(Statement::Io(IoStatement::Read { file })) } fn parse_readp(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::ReadP { file })) } fn parse_reade(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let key = self.parse_expression()?; let file = self.expect_ident()?; self.eat_semicolon(); // Treat as plain READ for now let _ = key; Ok(Statement::Io(IoStatement::Read { file })) } fn parse_readpe(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let key = self.parse_expression()?; let file = self.expect_ident()?; self.eat_semicolon(); let _ = key; Ok(Statement::Io(IoStatement::ReadP { file })) } fn parse_write(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let record = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Write { record })) } fn parse_update(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let record = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Update { record })) } fn parse_delete(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let key = self.parse_expression()?; let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Delete { key, file })) } fn parse_chain(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let key = self.parse_expression()?; let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Chain { key, file })) } fn parse_setll(&mut self) -> Result { self.advance(); let key = match self.peek().clone() { Token::KwStart => { self.advance(); SetKey::Start } Token::KwEnd => { self.advance(); SetKey::End } _ => SetKey::Expr(self.parse_expression()?), }; let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::SetLL { key, file })) } fn parse_setgt(&mut self) -> Result { self.advance(); let key = match self.peek().clone() { Token::KwStart => { self.advance(); SetKey::Start } Token::KwEnd => { self.advance(); SetKey::End } _ => SetKey::Expr(self.parse_expression()?), }; let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::SetGT { key, file })) } fn parse_open(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Open { file })) } fn parse_close(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); if self.eat(&Token::KwAll) { self.eat_semicolon(); return Ok(Statement::Io(IoStatement::Close { file: None })); } let file = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::Close { file: Some(file) })) } fn parse_except(&mut self) -> Result { self.advance(); let fmt = self.try_parse_ident(); self.eat_semicolon(); Ok(Statement::Io(IoStatement::Except { format: fmt })) } fn parse_exfmt(&mut self) -> Result { self.advance(); self.maybe_parse_io_option(); let format = self.expect_ident()?; self.eat_semicolon(); Ok(Statement::Io(IoStatement::ExFmt { format })) } // ── Identifier statement (assignment or implicit call) ───────────────── fn parse_ident_stmt(&mut self) -> Result { let name = self.expect_name()?; // Could be `name(args);` — an implicit procedure call if self.peek() == &Token::LParen { // Peek ahead to decide: call or subscript-assignment? // If after the matching ')' we see '=' it's an assignment, else call. // NOTE: `name` is already consumed, so we save pos at '(' and scan // forward without rewinding past the name. let saved = self.pos; self.advance(); // ( let mut depth = 1; while depth > 0 && !self.is_eof() { match self.peek() { Token::LParen => { self.advance(); depth += 1; } Token::RParen => { self.advance(); depth -= 1; } _ => { self.advance(); } } } let is_assign = self.peek() == &Token::OpEq; self.pos = saved; // rewind to '(' if is_assign { // subscript assignment: `name(idx) = expr;` // Build LValue directly using the already-consumed `name` // instead of calling parse_lvalue() (which would try to // re-consume the name from the current position which is '('). let qname = QualifiedName::simple(name.clone()); let mut indices = Vec::new(); self.advance(); // consume '(' indices.push(self.parse_expression()?); while self.eat(&Token::Colon) { indices.push(self.parse_expression()?); } self.eat(&Token::RParen); let lv = LValue::Index(qname, indices); self.expect(&Token::OpEq)?; let value = self.parse_expression()?; self.eat_semicolon(); return Ok(Statement::Assign(AssignStmt { target: lv, value })); } else { // implicit call let args = self.parse_arg_list()?; self.eat_semicolon(); return Ok(Statement::CallP(CallPStmt { name, args })); } } // qualified name assignment: `name.field = expr;` if self.peek() == &Token::Dot { let mut parts = vec![name]; while self.eat(&Token::Dot) { parts.push(self.expect_ident()?); } let qname = QualifiedName { parts }; // subscript? let lv = if self.peek() == &Token::LParen { let mut indices = Vec::new(); self.advance(); indices.push(self.parse_expression()?); while self.eat(&Token::Colon) { indices.push(self.parse_expression()?); } self.eat(&Token::RParen); LValue::Index(qname, indices) } else { LValue::Name(qname) }; self.expect(&Token::OpEq)?; let value = self.parse_expression()?; self.eat_semicolon(); return Ok(Statement::Assign(AssignStmt { target: lv, value })); } // Plain `name = expr;` if self.peek() == &Token::OpEq { self.advance(); // = let value = self.parse_expression()?; self.eat_semicolon(); let lv = LValue::Name(QualifiedName::simple(name)); return Ok(Statement::Assign(AssignStmt { target: lv, value })); } Err(LowerError::new(format!("cannot parse statement starting with identifier '{}'", name))) } // ── L-values ─────────────────────────────────────────────────────────── fn parse_lvalue(&mut self) -> Result { let name = self.expect_name()?; let mut parts = vec![name]; while self.eat(&Token::Dot) { parts.push(self.expect_name()?); } let qname = QualifiedName { parts }; if self.peek() == &Token::LParen { let mut indices = Vec::new(); self.advance(); indices.push(self.parse_expression()?); while self.eat(&Token::Colon) { indices.push(self.parse_expression()?); } self.eat(&Token::RParen); Ok(LValue::Index(qname, indices)) } else { Ok(LValue::Name(qname)) } } // ── Expressions ──────────────────────────────────────────────────────── fn parse_expression(&mut self) -> Result { self.parse_or_expr() } fn parse_or_expr(&mut self) -> Result { let mut lhs = self.parse_and_expr()?; while self.eat(&Token::KwOr) { let rhs = self.parse_and_expr()?; lhs = Expression::BinOp(BinOp::Or, Box::new(lhs), Box::new(rhs)); } Ok(lhs) } fn parse_and_expr(&mut self) -> Result { let mut lhs = self.parse_not_expr()?; while self.eat(&Token::KwAnd) { let rhs = self.parse_not_expr()?; lhs = Expression::BinOp(BinOp::And, Box::new(lhs), Box::new(rhs)); } Ok(lhs) } fn parse_not_expr(&mut self) -> Result { if self.eat(&Token::KwNot) { let expr = self.parse_comparison_expr()?; return Ok(Expression::Not(Box::new(expr))); } self.parse_comparison_expr() } fn parse_comparison_expr(&mut self) -> Result { let lhs = self.parse_additive_expr()?; let op = match self.peek() { Token::OpEq => BinOp::Eq, Token::OpNe => BinOp::Ne, Token::OpLt => BinOp::Lt, Token::OpLe => BinOp::Le, Token::OpGt => BinOp::Gt, Token::OpGe => BinOp::Ge, _ => return Ok(lhs), }; self.advance(); let rhs = self.parse_additive_expr()?; Ok(Expression::BinOp(op, Box::new(lhs), Box::new(rhs))) } fn parse_additive_expr(&mut self) -> Result { let mut lhs = self.parse_multiplicative_expr()?; loop { let op = match self.peek() { Token::OpPlus => BinOp::Add, Token::OpMinus => BinOp::Sub, _ => break, }; self.advance(); let rhs = self.parse_multiplicative_expr()?; lhs = Expression::BinOp(op, Box::new(lhs), Box::new(rhs)); } Ok(lhs) } fn parse_multiplicative_expr(&mut self) -> Result { let mut lhs = self.parse_unary_expr()?; loop { let op = match self.peek() { Token::OpStar2 => BinOp::Pow, Token::OpStar => BinOp::Mul, Token::OpSlash => BinOp::Div, _ => break, }; self.advance(); let rhs = self.parse_unary_expr()?; lhs = Expression::BinOp(op, Box::new(lhs), Box::new(rhs)); } Ok(lhs) } fn parse_unary_expr(&mut self) -> Result { if self.eat(&Token::OpMinus) { let e = self.parse_primary_expr()?; return Ok(Expression::UnaryMinus(Box::new(e))); } if self.eat(&Token::OpPlus) { let e = self.parse_primary_expr()?; return Ok(Expression::UnaryPlus(Box::new(e))); } self.parse_primary_expr() } fn parse_primary_expr(&mut self) -> Result { match self.peek().clone() { // Parenthesised expression Token::LParen => { self.advance(); let e = self.parse_expression()?; self.eat(&Token::RParen); Ok(Expression::Paren(Box::new(e))) } // String literal Token::StringLit(s) => { self.advance(); Ok(Expression::Literal(Literal::String(s))) } // Integer literal Token::IntLit(n) => { self.advance(); Ok(Expression::Literal(Literal::Integer(n))) } // Float literal Token::FloatLit(f) => { self.advance(); Ok(Expression::Literal(Literal::Float(f))) } // Hex literal Token::HexLit(h) => { self.advance(); let bytes = hex_to_bytes(&h); Ok(Expression::Literal(Literal::Hex(bytes))) } // Named constants Token::KwOn | Token::KwOff | Token::KwBlank | Token::KwBlanks | Token::KwZero | Token::KwZeros | Token::KwHiVal | Token::KwLoVal | Token::KwNull => { let nc = self.parse_named_constant()?; Ok(Expression::Named(nc)) } // *IN(n) special Token::KwIn2 => { self.advance(); if self.peek() == &Token::LParen { self.advance(); let e = self.parse_expression()?; self.eat(&Token::RParen); Ok(Expression::Special(SpecialValue::In(Box::new(e)))) } else { Ok(Expression::Special(SpecialValue::InAll)) } } // Other special values Token::KwStart => { self.advance(); Ok(Expression::Special(SpecialValue::Start)) } Token::KwEnd => { self.advance(); Ok(Expression::Special(SpecialValue::End)) } Token::KwOmit => { self.advance(); Ok(Expression::Special(SpecialValue::Omit)) } Token::KwThis => { self.advance(); Ok(Expression::Special(SpecialValue::This)) } Token::KwSame => { self.advance(); Ok(Expression::Special(SpecialValue::Same)) } Token::KwAll => { self.advance(); // *ALL'str' if let Token::StringLit(s) = self.peek().clone() { self.advance(); Ok(Expression::Special(SpecialValue::All(s))) } else { Ok(Expression::Special(SpecialValue::Blanks)) } } // Built-in functions tok if is_bif_token(&tok) => { self.parse_builtin_expr() } // Identifier (or keyword used as a name) — variable reference, // qualified name, or call. ref tok if token_as_name(tok).is_some() => { let name = token_as_name(self.peek()).unwrap(); self.advance(); // Qualified name (dots)? let mut parts = vec![name.clone()]; while self.eat(&Token::Dot) { if let Some(s) = self.try_parse_ident_or_name() { parts.push(s); } else { break; } } let qname = QualifiedName { parts }; // Call `name(args)` or subscript `name(idx)`? if self.peek() == &Token::LParen && qname.is_simple() { let args = self.parse_arg_list()?; Ok(Expression::Call(name, args)) } else if self.peek() == &Token::LParen { let mut indices = Vec::new(); self.advance(); indices.push(self.parse_expression()?); while self.eat(&Token::Colon) { indices.push(self.parse_expression()?); } self.eat(&Token::RParen); Ok(Expression::Index(qname, indices)) } else { Ok(Expression::Variable(qname)) } } tok => { Err(LowerError::new(format!("unexpected token in expression: {:?}", tok))) } } } fn parse_named_constant(&mut self) -> Result { let nc = match self.peek() { Token::KwOn => NamedConstant::On, Token::KwOff => NamedConstant::Off, Token::KwBlank => NamedConstant::Blank, Token::KwBlanks => NamedConstant::Blanks, Token::KwZero => NamedConstant::Zero, Token::KwZeros => NamedConstant::Zeros, Token::KwHiVal => NamedConstant::HiVal, Token::KwLoVal => NamedConstant::LoVal, Token::KwNull => NamedConstant::Null, tok => return Err(LowerError::new(format!("expected named constant, got {:?}", tok))), }; self.advance(); Ok(nc) } fn parse_literal(&mut self) -> Result { match self.peek().clone() { Token::StringLit(s) => { self.advance(); Ok(Literal::String(s)) } Token::IntLit(n) => { self.advance(); Ok(Literal::Integer(n)) } Token::FloatLit(f) => { self.advance(); Ok(Literal::Float(f)) } Token::HexLit(h) => { self.advance(); Ok(Literal::Hex(hex_to_bytes(&h))) } Token::KwOn => { self.advance(); Ok(Literal::Indicator(true)) } Token::KwOff => { self.advance(); Ok(Literal::Indicator(false)) } tok => Err(LowerError::new(format!("expected literal, got {:?}", tok))), } } fn parse_builtin_expr(&mut self) -> Result { let bif_tok = self.advance(); self.expect(&Token::LParen).map_err(|e| LowerError::new(format!( "built-in function {:?}: {}", bif_tok, e.message )))?; let bif = match bif_tok { Token::BifLen => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Len(Box::new(e)) } Token::BifTrim => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Trim(Box::new(e)) } Token::BifTrimL => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::TrimL(Box::new(e)) } Token::BifTrimR => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::TrimR(Box::new(e)) } Token::BifChar => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Char(Box::new(e)) } Token::BifInt => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Int(Box::new(e)) } Token::BifAbs => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Abs(Box::new(e)) } Token::BifSqrt => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Sqrt(Box::new(e)) } Token::BifEof => { let name = self.try_parse_ident(); self.eat(&Token::RParen); BuiltIn::Eof(name) } Token::BifFound => { let name = self.try_parse_ident(); self.eat(&Token::RParen); BuiltIn::Found(name) } Token::BifError => { self.eat(&Token::RParen); BuiltIn::Error } Token::BifElem => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Elem(Box::new(e)) } Token::BifSize => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Size(Box::new(e)) } Token::BifAddr => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Addr(Box::new(e)) } Token::BifAlloc => { let e = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Alloc(Box::new(e)) } Token::BifRem => { let a = self.parse_expression()?; self.eat(&Token::Colon); let b = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Rem(Box::new(a), Box::new(b)) } Token::BifDiv => { let a = self.parse_expression()?; self.eat(&Token::Colon); let b = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Div(Box::new(a), Box::new(b)) } Token::BifDec => { let e = self.parse_expression()?; self.eat(&Token::Colon); let d = self.parse_expression()?; self.eat(&Token::Colon); let f = self.parse_expression()?; self.eat(&Token::RParen); BuiltIn::Dec(Box::new(e), Box::new(d), Box::new(f)) } Token::BifSubst => { let s = self.parse_expression()?; self.eat(&Token::Colon); let start = self.parse_expression()?; let len = if self.eat(&Token::Colon) { Some(Box::new(self.parse_expression()?)) } else { None }; self.eat(&Token::RParen); BuiltIn::Subst(Box::new(s), Box::new(start), len) } Token::BifScan => { let p = self.parse_expression()?; self.eat(&Token::Colon); let src = self.parse_expression()?; let start = if self.eat(&Token::Colon) { Some(Box::new(self.parse_expression()?)) } else { None }; self.eat(&Token::RParen); BuiltIn::Scan(Box::new(p), Box::new(src), start) } tok => { // Generic BIF: collect all arguments let mut args = Vec::new(); while !matches!(self.peek(), Token::RParen | Token::Eof) { if let Ok(e) = self.parse_expression() { args.push(e); } else { self.advance(); } if !self.eat(&Token::Colon) { break; } } self.eat(&Token::RParen); BuiltIn::Other(format!("{:?}", tok), args) } }; Ok(Expression::BuiltIn(bif)) } // ── Argument lists ───────────────────────────────────────────────────── fn parse_arg_list(&mut self) -> Result, LowerError> { self.expect(&Token::LParen)?; let mut args = Vec::new(); if self.peek() == &Token::RParen { self.advance(); return Ok(args); } loop { if self.eat(&Token::KwOmit) { args.push(Arg::Omit); } else { let e = self.parse_expression()?; args.push(Arg::Expr(e)); } if !self.eat(&Token::Colon) { break; } } self.eat(&Token::RParen); Ok(args) } // ── Helpers ──────────────────────────────────────────────────────────── fn expect_ident(&mut self) -> Result { match self.advance() { Token::Identifier(s) => Ok(s), tok => Err(LowerError::new(format!("expected identifier, got {:?}", tok))), } } /// Like `expect_ident` but also accepts keyword tokens as names. /// /// RPG IV procedure names and variable names can collide with keywords /// (e.g. `main`, `read`, `write`, `open`, `close`, `date`, `time`). /// This helper converts any single-word keyword token back to its string /// representation so it can be used as an identifier. fn expect_name(&mut self) -> Result { let tok = self.advance(); let s = token_as_name(&tok); if let Some(name) = s { Ok(name) } else { Err(LowerError::new(format!("expected name, got {:?}", tok))) } } fn try_parse_name(&mut self) -> Option { let s = token_as_name(self.peek())?; self.advance(); Some(s) } fn try_parse_ident(&mut self) -> Option { if let Token::Identifier(s) = self.peek().clone() { self.advance(); Some(s) } else { None } } /// Try to parse an identifier OR a keyword-as-name (like `main`, `read`). fn try_parse_ident_or_name(&mut self) -> Option { // Prefer the strict identifier form first. if let Token::Identifier(s) = self.peek().clone() { self.advance(); return Some(s); } self.try_parse_name() } fn skip_paren_group(&mut self) { if self.peek() != &Token::LParen { return; } self.advance(); let mut depth = 1; while depth > 0 && !self.is_eof() { match self.peek() { Token::LParen => { self.advance(); depth += 1; } Token::RParen => { self.advance(); depth -= 1; } _ => { self.advance(); } } } } } // ───────────────────────────────────────────────────────────────────────────── // Helper functions // ───────────────────────────────────────────────────────────────────────────── /// Try to interpret any single-word token as a plain name string. /// /// This is needed because RPG IV procedure names and variable names can /// collide with keywords (e.g. `main`, `read`, `time`, `date`, `open`). /// Identifiers always win; for keyword tokens we return the canonical /// lowercase or mixed-case spelling that the source would have used. fn token_as_name(tok: &Token) -> Option { match tok { Token::Identifier(s) => Some(s.clone()), // Statement / declaration keywords that are commonly used as names. Token::KwMain => Some("main".into()), Token::KwRead => Some("read".into()), Token::KwWrite => Some("write".into()), Token::KwOpen => Some("open".into()), Token::KwClose => Some("close".into()), Token::KwDelete => Some("delete".into()), Token::KwUpdate => Some("update".into()), Token::KwDate => Some("date".into()), Token::KwTime => Some("time".into()), Token::KwTimestamp => Some("timestamp".into()), Token::KwChar => Some("char".into()), Token::KwInt => Some("int".into()), Token::KwFloat => Some("float".into()), Token::KwInd => Some("ind".into()), Token::KwPointer => Some("pointer".into()), Token::KwText => Some("text".into()), Token::KwOption => Some("option".into()), Token::KwExport => Some("export".into()), Token::KwForce => Some("force".into()), Token::KwPost => Some("post".into()), Token::KwFeod => Some("feod".into()), Token::KwUnlock => Some("unlock".into()), Token::KwSortA => Some("sorta".into()), Token::KwDump => Some("dump".into()), Token::KwReset => Some("reset".into()), Token::KwClear => Some("clear".into()), Token::KwLeave => Some("leave".into()), Token::KwIter => Some("iter".into()), Token::KwReturn => Some("return".into()), Token::KwSelect => Some("select".into()), Token::KwWhen => Some("when".into()), Token::KwOther => Some("other".into()), Token::KwMonitor => Some("monitor".into()), Token::KwFor => Some("for".into()), Token::KwTo => Some("to".into()), Token::KwBy => Some("by".into()), Token::KwDownTo => Some("downto".into()), Token::KwDsply => Some("dsply".into()), Token::KwCallP => Some("callp".into()), Token::KwExSr => Some("exsr".into()), Token::KwExFmt => Some("exfmt".into()), Token::KwExcept => Some("except".into()), Token::KwChain => Some("chain".into()), Token::KwSetLL => Some("setll".into()), Token::KwSetGT => Some("setgt".into()), Token::KwReadP => Some("readp".into()), Token::KwReadE => Some("reade".into()), Token::KwReadPE => Some("readpe".into()), Token::KwCommit => Some("commit".into()), Token::KwRollback => Some("rollback".into()), Token::KwDeAlloc => Some("dealloc".into()), Token::KwIf => Some("if".into()), Token::KwElse => Some("else".into()), Token::KwElseIf => Some("elseif".into()), Token::KwEndIf => Some("endif".into()), Token::KwDow => Some("dow".into()), Token::KwDou => Some("dou".into()), Token::KwEndDo => Some("enddo".into()), Token::KwEndFor => Some("endfor".into()), Token::KwEndSl => Some("endsl".into()), Token::KwEndMon => Some("endmon".into()), Token::KwEval => Some("eval".into()), Token::KwEvalR => Some("evalr".into()), Token::KwEvalCorr => Some("eval-corr".into()), Token::KwLeaveSr => Some("leavesr".into()), Token::KwNoMain => Some("nomain".into()), Token::KwDftActGrp => Some("dftactgrp".into()), Token::KwActGrp => Some("actgrp".into()), Token::KwBndDir => Some("bnddir".into()), Token::KwCopyright => Some("copyright".into()), Token::KwDebug => Some("debug".into()), Token::KwExpOpts => Some("expropts".into()), Token::KwDatFmt => Some("datfmt".into()), Token::KwTimFmt => Some("timfmt".into()), Token::KwDecEdit => Some("decedit".into()), Token::KwAlwNull => Some("alwnull".into()), Token::KwStgMdl => Some("stgmdl".into()), Token::KwTruncNbr => Some("truncnbr".into()), Token::KwInz => Some("inz".into()), Token::KwConst => Some("const".into()), Token::KwValue => Some("value".into()), Token::KwStatic => Some("static".into()), Token::KwBased => Some("based".into()), Token::KwDim => Some("dim".into()), Token::KwAscend => Some("ascend".into()), Token::KwDescend => Some("descend".into()), Token::KwAltSeq => Some("altseq".into()), Token::KwOpDesc => Some("opdesc".into()), Token::KwNoOpt => Some("noopt".into()), Token::KwVolatile => Some("volatile".into()), Token::KwOptions => Some("options".into()), Token::KwQualified => Some("qualified".into()), Token::KwTemplate => Some("template".into()), Token::KwExtName => Some("extname".into()), Token::KwExtProc => Some("extproc".into()), Token::KwLike => Some("like".into()), Token::KwLikeDs => Some("likeds".into()), Token::KwLikeRec => Some("likerec".into()), Token::KwVarChar => Some("varchar".into()), Token::KwGraph => Some("graph".into()), Token::KwVarGraph => Some("vargraph".into()), Token::KwUcs2 => Some("ucs2".into()), Token::KwVarUcs2 => Some("varucs2".into()), Token::KwPacked => Some("packed".into()), Token::KwZoned => Some("zoned".into()), Token::KwBindec => Some("bindec".into()), Token::KwUns => Some("uns".into()), Token::KwObject => Some("object".into()), Token::KwProcPtr => Some("procptr".into()), Token::KwLikeRec2 => Some("likerec".into()), _ => None, } } fn is_bif_token(tok: &Token) -> bool { matches!(tok, Token::BifAbs | Token::BifAddr | Token::BifAlloc | Token::BifBitAnd | Token::BifBitNot | Token::BifBitOr | Token::BifBitXor | Token::BifChar | Token::BifCheck | Token::BifCheckR | Token::BifDate | Token::BifDays | Token::BifDec | Token::BifDecH | Token::BifDecPos | Token::BifDiff | Token::BifDiv | Token::BifEditC | Token::BifEditFlt | Token::BifEditW | Token::BifElem | Token::BifEof | Token::BifEqual | Token::BifError | Token::BifFields | Token::BifFloat | Token::BifFound | Token::BifGraph | Token::BifHours | Token::BifInt | Token::BifIntH | Token::BifKds | Token::BifLen | Token::BifMinutes | Token::BifMonths | Token::BifMSeconds | Token::BifNullInd | Token::BifOccur | Token::BifOpen | Token::BifPAddr | Token::BifParms | Token::BifReAlloc | Token::BifRem | Token::BifReplace | Token::BifScan | Token::BifScanR | Token::BifSeconds | Token::BifShtDn | Token::BifSize | Token::BifSqrt | Token::BifStatus | Token::BifStr | Token::BifSubArr | Token::BifSubst | Token::BifThis | Token::BifTime | Token::BifTimestamp | Token::BifTrim | Token::BifTrimL | Token::BifTrimR | Token::BifUcs2 | Token::BifUns | Token::BifUnsH | Token::BifXFoot | Token::BifXLate | Token::BifYears ) } fn hex_to_bytes(h: &str) -> Vec { h.as_bytes() .chunks(2) .filter_map(|c| { let s = std::str::from_utf8(c).ok()?; u8::from_str_radix(s, 16).ok() }) .collect() } // ───────────────────────────────────────────────────────────────────────────── // Unit tests // ───────────────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; fn lower_ok(src: &str) -> Program { lower(src).expect("lower should succeed") } #[test] fn tokenize_ctl_opt() { let tokens = tokenize("CTL-OPT DFTACTGRP(*NO);").unwrap(); assert!(tokens.contains(&Token::KwCtlOpt)); assert!(tokens.contains(&Token::KwDftActGrp)); assert!(tokens.contains(&Token::KwNo)); } #[test] fn tokenize_string_literal() { let tokens = tokenize("'Hello, World!'").unwrap(); assert!(tokens.contains(&Token::StringLit("Hello, World!".into()))); } #[test] fn tokenize_embedded_quote() { let tokens = tokenize("'it''s'").unwrap(); assert!(tokens.contains(&Token::StringLit("it's".into()))); } #[test] fn tokenize_star_constants() { let tokens = tokenize("*ON *OFF *BLANK *BLANKS *NULL").unwrap(); assert!(tokens.contains(&Token::KwOn)); assert!(tokens.contains(&Token::KwOff)); assert!(tokens.contains(&Token::KwBlank)); assert!(tokens.contains(&Token::KwBlanks)); assert!(tokens.contains(&Token::KwNull)); } #[test] fn tokenize_bif() { let tokens = tokenize("%TRIM(name)").unwrap(); assert!(tokens.contains(&Token::BifTrim)); } #[test] fn lower_ctl_opt() { let p = lower_ok("CTL-OPT DFTACTGRP(*NO);"); assert_eq!(p.declarations.len(), 1); if let Declaration::ControlSpec(cs) = &p.declarations[0] { assert!(cs.keywords.iter().any(|k| matches!(k, CtlKeyword::DftActGrp(false)))); } else { panic!("expected ControlSpec"); } } #[test] fn lower_ctl_opt_nomain() { let p = lower_ok("CTL-OPT NOMAIN;"); if let Declaration::ControlSpec(cs) = &p.declarations[0] { assert!(cs.keywords.iter().any(|k| matches!(k, CtlKeyword::NoMain))); } } #[test] fn lower_dcl_s_char_inz() { let p = lower_ok("DCL-S greeting CHAR(25) INZ('Hello, World!');"); assert_eq!(p.declarations.len(), 1); if let Declaration::Standalone(decl) = &p.declarations[0] { assert_eq!(decl.name, "greeting"); assert!(matches!(decl.ty, TypeSpec::Char(_))); assert!(decl.keywords.iter().any(|k| matches!(k, VarKeyword::InzExpr(_)))); } else { panic!("expected Standalone"); } } #[test] fn lower_dcl_s_int() { let p = lower_ok("DCL-S counter INT(10) INZ(0);"); if let Declaration::Standalone(decl) = &p.declarations[0] { assert_eq!(decl.name, "counter"); assert!(matches!(decl.ty, TypeSpec::Int(_))); } } #[test] fn lower_dcl_c() { let p = lower_ok("DCL-C MAX_SIZE CONST(100);"); if let Declaration::Constant(c) = &p.declarations[0] { assert_eq!(c.name, "MAX_SIZE"); } } #[test] fn lower_dcl_proc_export() { let src = "DCL-PROC main EXPORT;\n RETURN;\nEND-PROC;"; let p = lower_ok(src); assert_eq!(p.procedures.len(), 1); let proc = &p.procedures[0]; assert_eq!(proc.name, "main"); assert!(proc.exported); assert_eq!(proc.body.len(), 1); assert!(matches!(proc.body[0], Statement::Return(_))); } #[test] fn lower_dsply() { let src = "DCL-PROC main EXPORT;\n DSPLY greeting;\n RETURN;\nEND-PROC;"; let p = lower_ok(src); let proc = &p.procedures[0]; assert!(matches!(proc.body[0], Statement::Dsply(_))); } #[test] fn lower_hello_rpg() { let hello = include_str!("../hello.rpg"); let p = lower_ok(hello); assert!(!p.procedures.is_empty(), "should have at least one procedure"); let proc = p.procedures.iter().find(|p| p.name == "main").expect("main proc"); assert!(proc.exported); // Should have DSPLY and RETURN assert!(proc.body.iter().any(|s| matches!(s, Statement::Dsply(_)))); assert!(proc.body.iter().any(|s| matches!(s, Statement::Return(_)))); } #[test] fn lower_if_stmt() { let src = "DCL-PROC p EXPORT;\n IF x=1;\n RETURN;\n ENDIF;\nEND-PROC;"; let p = lower_ok(src); let proc = &p.procedures[0]; assert!(matches!(proc.body[0], Statement::If(_))); } #[test] fn lower_dow_stmt() { let src = "DCL-PROC p EXPORT;\n DOW x>0;\n LEAVE;\n ENDDO;\nEND-PROC;"; let p = lower_ok(src); let proc = &p.procedures[0]; assert!(matches!(proc.body[0], Statement::DoWhile(_))); } #[test] fn lower_assign_stmt() { let src = "DCL-PROC p EXPORT;\n x=1;\n RETURN;\nEND-PROC;"; let p = lower_ok(src); assert!(matches!(p.procedures[0].body[0], Statement::Assign(_))); } }