From afa8d17f10fd19de7ae08da701cdbbd8a3acb283 Mon Sep 17 00:00:00 2001 From: charles Date: Thu, 28 May 2026 00:08:52 -0700 Subject: [PATCH] Mostly working --- main.py | 26 ++++++++++++++++++++++++-- src/pipeline/orchestrator.py | 7 +++++-- src/rag/manager.py | 16 ++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index ab8cd9d..071291d 100644 --- a/main.py +++ b/main.py @@ -10,16 +10,38 @@ def main(): type=str, help="Path to a PDF file to ingest into the RAG system", ) + parser.add_argument( + "--ingest-file", + type=str, + help="Path to a markdown file to ingest into the RAG system", + ) + parser.add_argument( + "--ingest-dir", + type=str, + help="Path to a directory of markdown files to ingest into the RAG system", + ) args = parser.parse_args() + rag_manager = RAGManager() + if args.ingest_pdf: print(f"Ingesting PDF: {args.ingest_pdf}...") - rag_manager = RAGManager() rag_manager.ingest_pdf(args.ingest_pdf) print("PDF ingestion complete.") - print("Hello from dnd-helpers!") + if args.ingest_file: + print(f"Ingesting File: {args.ingest_file}...") + rag_manager.ingest_file(args.ingest_file) + print("File ingestion complete.") + + if args.ingest_dir: + print(f"Ingesting Directory: {args.ingest_dir}...") + rag_manager.ingest_directory(args.ingest_dir) + print("Directory ingestion complete.") + + if not any([args.ingest_pdf, args.ingest_file, args.ingest_dir]): + print("Hello from dnd-helpers!") if __name__ == "__main__": diff --git a/src/pipeline/orchestrator.py b/src/pipeline/orchestrator.py index 477bbe8..d15b7b2 100644 --- a/src/pipeline/orchestrator.py +++ b/src/pipeline/orchestrator.py @@ -215,8 +215,11 @@ class PipelineOrchestrator: # Persistence: Lore Updates for lore_update in extraction_result.lore_updates: - await asyncio.to_thread(update_lore, lore_update) - logger.info(f"LLM Worker: Lore updated: {lore_update.entity_name}") + file_path = await asyncio.to_thread(update_lore, lore_update) + await asyncio.to_thread(self.rag_manager.ingest_file, file_path) + logger.info( + f"LLM Worker: Lore updated and ingested into RAG: {lore_update.entity_name}" + ) # Persistence: Character State Updates for char_update in extraction_result.character_updates: diff --git a/src/rag/manager.py b/src/rag/manager.py index 5cdd2bc..63eb641 100644 --- a/src/rag/manager.py +++ b/src/rag/manager.py @@ -87,6 +87,22 @@ class RAGManager: print(f"Successfully ingested {file_path} into the vector store.") + def ingest_directory(self, dir_path: str): + """ + Recursively loads all markdown files in a directory into the index. + """ + files_processed = 0 + for root, _, files in os.walk(dir_path): + for file in files: + if file.endswith(".md"): + file_path = os.path.join(root, file) + self.ingest_file(file_path) + files_processed += 1 + + print( + f"Successfully ingested {files_processed} files from {dir_path} into the vector store." + ) + def summarize_results(self, query: str, nodes: List[Any]) -> List[ContextUpdate]: """ Uses an LLM to transform raw snippets into concise "insights", filtering out irrelevant content.