Improvements

This commit is contained in:
2026-05-26 21:07:58 -07:00
parent 58bab75bb5
commit d0fcdfab01
21 changed files with 121 additions and 33 deletions
+65 -4
View File
@@ -1,5 +1,8 @@
import asyncio
import logging
import os
from pathlib import Path
from typing import List, Optional
from src.llm.models import ExtractionResult
from src.llm.processor import LLMProcessor
@@ -7,7 +10,14 @@ from src.stt.listener import AudioListener
from src.stt.transcriber import Transcriber
from src.ui.tui import ConfirmationApp
logging.basicConfig(level=logging.INFO)
# Configure logging to write to a file instead of stdout
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[
logging.FileHandler("pipeline.log"),
],
)
logger = logging.getLogger(__name__)
@@ -17,7 +27,7 @@ class PipelineOrchestrator:
# Modules
self.listener = AudioListener(loop=self.loop)
self.transcriber = Transcriber()
self.transcriber = Transcriber(model_size="small")
self.processor = LLMProcessor()
# Queues
@@ -26,6 +36,10 @@ class PipelineOrchestrator:
self.is_running = False
# Conversation history for context
self.history = [] # List of strings (transcripts)
self.history_max_words = 1000
async def stt_worker(self):
"""
Worker that handles STT: Audio -> Text.
@@ -61,9 +75,29 @@ class PipelineOrchestrator:
logger.info(f"LLM Worker: Processing text: {raw_text}")
# 1. Prepare Context (Conversation History)
# Maintain history and truncate to max words
self.history.append(raw_text)
full_history_text = " ".join(self.history)
words = full_history_text.split()
if len(words) > self.history_max_words:
# Keep the last N words
kept_words = words[-self.history_max_words :]
context_text = " ".join(kept_words)
else:
context_text = full_history_text
# 2. Prepare Context (Wiki / Database of Knowledge)
wiki_context = self._get_wiki_context()
# Combine both
combined_context = f"Conversation History:\n{context_text}\n\nWiki Knowledge:\n{wiki_context}"
# Process via LLM (Filter -> Extract)
# Note: this is currently a synchronous call, which blocks the loop.
result = self.processor.process_pipeline(raw_text)
# Run in a separate thread to avoid blocking the event loop
result = await asyncio.to_thread(
self.processor.process_pipeline, raw_text, context=combined_context
)
if (
result.lore_updates
@@ -83,6 +117,30 @@ class PipelineOrchestrator:
# Small sleep
await asyncio.sleep(0.1)
def _get_wiki_context(self) -> str:
"""
Reads all files in the lore directory and returns them as a single context string.
"""
from src.persistence.lore import DATA_LORE_DIR
wiki_contents = []
# Recursively find all .md files in the lore directory
for path in DATA_LORE_DIR.rglob("*.md"):
try:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
wiki_contents.append(
f"File: {path.relative_to(DATA_LORE_DIR)}\nContent:\n{content}"
)
except Exception as e:
logger.error(f"Error reading wiki file {path}: {e}")
return (
"\n\n".join(wiki_contents)
if wiki_contents
else "No wiki knowledge available."
)
async def tui_worker(self):
"""
Worker that handles TUI: Proposal -> Persistence.
@@ -93,8 +151,11 @@ class PipelineOrchestrator:
# Pass the proposal queue to the app.
app = ConfirmationApp(proposal_queue=self.proposal_queue)
await app.run_async()
# Once the TUI exits, stop the entire pipeline
self.stop()
except Exception as e:
logger.error(f"TUI Worker error: {e}")
self.stop()
async def run(self):
"""