Improvements

This commit is contained in:
2026-05-26 21:07:58 -07:00
parent 58bab75bb5
commit d0fcdfab01
21 changed files with 121 additions and 33 deletions
+1 -1
View File
@@ -1,2 +1,2 @@
artifacts/ artifacts/
__pycache__ **/__pycache__/
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+50 -22
View File
@@ -1,3 +1,4 @@
import logging
import os import os
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
@@ -7,6 +8,8 @@ from pydantic import ValidationError
from .models import ExtractionResult from .models import ExtractionResult
from .prompts import EXTRACTION_SYSTEM_PROMPT, NOISE_FILTER_SYSTEM_PROMPT from .prompts import EXTRACTION_SYSTEM_PROMPT, NOISE_FILTER_SYSTEM_PROMPT
logger = logging.getLogger(__name__)
class LLMProcessor: class LLMProcessor:
def __init__( def __init__(
@@ -47,7 +50,7 @@ class LLMProcessor:
# but we can ensure the client is initialized. # but we can ensure the client is initialized.
pass pass
except Exception as e: except Exception as e:
print(f"Error initializing LLM client for backend {backend}: {e}") logger.error(f"Error initializing LLM client for backend {backend}: {e}")
raise raise
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o") self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
@@ -56,73 +59,98 @@ class LLMProcessor:
self, self,
system_prompt: str, system_prompt: str,
user_prompt: str, user_prompt: str,
context: Optional[str] = None,
response_format: Optional[Any] = None, response_format: Optional[Any] = None,
) -> str: ) -> str:
""" """
Generic method to call the LLM. Generic method to call the LLM.
""" """
messages = [
{"role": "system", "content": system_prompt},
]
if context:
messages.append(
{
"role": "system",
"content": f"Context from previous conversation:\n{context}",
}
)
messages.append({"role": "user", "content": user_prompt})
try: try:
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=self.model, model=self.model,
messages=[ messages=messages,
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
response_format=response_format, response_format=response_format,
extra_body={"include_reasoning": False}, extra_body={"include_reasoning": False},
) )
return response.choices[0].message.content return response.choices[0].message.content
except Exception as e: except Exception as e:
print(f"LLM Error: {e}") logger.error(f"LLM Error: {e}")
return "" return ""
def filter_transcript(self, text: str) -> str: def filter_transcript(self, text: str, context: Optional[str] = None) -> str:
""" """
Stage 1: Raw Transcript -> Filtered Text. Stage 1: Raw Transcript -> Filtered Text.
""" """
result = self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text) result = self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text, context=context)
print(f"LLM Processor (Filter): {text} -> {result}") logger.info(f"LLM Processor (Filter): {text} -> {result}")
return result return result
def extract_structured_data(self, filtered_text: str) -> ExtractionResult: def extract_structured_data(
self, filtered_text: str, context: Optional[str] = None
) -> ExtractionResult:
""" """
Stage 2: Filtered Text -> Structured Data. Stage 2: Filtered Text -> Structured Data.
""" """
print(f"LLM Processor (Extract): Calling extraction for: {filtered_text}") logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
try: try:
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM # Using standard chat.completions.create with JSON mode for better compatibility with vLLM
print("LLM Processor (Extract): Sending request to backend...") logger.info("LLM Processor (Extract): Sending request to backend...")
response = self.client.chat.completions.create(
model=self.model,
messages = [ messages = [
{"role": "system", "content": EXTRACTION_SYSTEM_PROMPT}, {"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
{"role": "user", "content": filtered_text}, ]
], if context:
messages.append(
{
"role": "system",
"content": f"Context from previous conversation:\n{context}",
}
)
messages.append({"role": "user", "content": filtered_text})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}, response_format={"type": "json_object"},
extra_body={"include_reasoning": False}, extra_body={"include_reasoning": False},
) )
print("LLM Processor (Extract): Response received from backend.") logger.info("LLM Processor (Extract): Response received from backend.")
import json import json
content = response.choices[0].message.content content = response.choices[0].message.content
print(f"LLM Processor (Extract): Raw JSON response: {content}") logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
data = json.loads(content) data = json.loads(content)
# Map the JSON data to the Pydantic model # Map the JSON data to the Pydantic model
return ExtractionResult(**data) return ExtractionResult(**data)
except Exception as e: except Exception as e:
print(f"Extraction Error: {e}") logger.error(f"Extraction Error: {e}")
# Return an empty ExtractionResult if parsing fails # Return an empty ExtractionResult if parsing fails
return ExtractionResult() return ExtractionResult()
def process_pipeline(self, raw_text: str) -> ExtractionResult: def process_pipeline(
self, raw_text: str, context: Optional[str] = None
) -> ExtractionResult:
""" """
Executes the two-stage pipeline: Raw Transcript -> Filtered Text -> Structured Data. Executes the two-stage pipeline: Raw Transcript -> Filtered Text -> Structured Data.
""" """
filtered_text = self.filter_transcript(raw_text) filtered_text = self.filter_transcript(raw_text, context=context)
if not filtered_text: if not filtered_text:
return ExtractionResult() return ExtractionResult()
return self.extract_structured_data(filtered_text) return self.extract_structured_data(filtered_text, context=context)
Binary file not shown.
Binary file not shown.
+65 -4
View File
@@ -1,5 +1,8 @@
import asyncio import asyncio
import logging import logging
import os
from pathlib import Path
from typing import List, Optional
from src.llm.models import ExtractionResult from src.llm.models import ExtractionResult
from src.llm.processor import LLMProcessor from src.llm.processor import LLMProcessor
@@ -7,7 +10,14 @@ from src.stt.listener import AudioListener
from src.stt.transcriber import Transcriber from src.stt.transcriber import Transcriber
from src.ui.tui import ConfirmationApp from src.ui.tui import ConfirmationApp
logging.basicConfig(level=logging.INFO) # Configure logging to write to a file instead of stdout
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[
logging.FileHandler("pipeline.log"),
],
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -17,7 +27,7 @@ class PipelineOrchestrator:
# Modules # Modules
self.listener = AudioListener(loop=self.loop) self.listener = AudioListener(loop=self.loop)
self.transcriber = Transcriber() self.transcriber = Transcriber(model_size="small")
self.processor = LLMProcessor() self.processor = LLMProcessor()
# Queues # Queues
@@ -26,6 +36,10 @@ class PipelineOrchestrator:
self.is_running = False self.is_running = False
# Conversation history for context
self.history = [] # List of strings (transcripts)
self.history_max_words = 1000
async def stt_worker(self): async def stt_worker(self):
""" """
Worker that handles STT: Audio -> Text. Worker that handles STT: Audio -> Text.
@@ -61,9 +75,29 @@ class PipelineOrchestrator:
logger.info(f"LLM Worker: Processing text: {raw_text}") logger.info(f"LLM Worker: Processing text: {raw_text}")
# 1. Prepare Context (Conversation History)
# Maintain history and truncate to max words
self.history.append(raw_text)
full_history_text = " ".join(self.history)
words = full_history_text.split()
if len(words) > self.history_max_words:
# Keep the last N words
kept_words = words[-self.history_max_words :]
context_text = " ".join(kept_words)
else:
context_text = full_history_text
# 2. Prepare Context (Wiki / Database of Knowledge)
wiki_context = self._get_wiki_context()
# Combine both
combined_context = f"Conversation History:\n{context_text}\n\nWiki Knowledge:\n{wiki_context}"
# Process via LLM (Filter -> Extract) # Process via LLM (Filter -> Extract)
# Note: this is currently a synchronous call, which blocks the loop. # Run in a separate thread to avoid blocking the event loop
result = self.processor.process_pipeline(raw_text) result = await asyncio.to_thread(
self.processor.process_pipeline, raw_text, context=combined_context
)
if ( if (
result.lore_updates result.lore_updates
@@ -83,6 +117,30 @@ class PipelineOrchestrator:
# Small sleep # Small sleep
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
def _get_wiki_context(self) -> str:
"""
Reads all files in the lore directory and returns them as a single context string.
"""
from src.persistence.lore import DATA_LORE_DIR
wiki_contents = []
# Recursively find all .md files in the lore directory
for path in DATA_LORE_DIR.rglob("*.md"):
try:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
wiki_contents.append(
f"File: {path.relative_to(DATA_LORE_DIR)}\nContent:\n{content}"
)
except Exception as e:
logger.error(f"Error reading wiki file {path}: {e}")
return (
"\n\n".join(wiki_contents)
if wiki_contents
else "No wiki knowledge available."
)
async def tui_worker(self): async def tui_worker(self):
""" """
Worker that handles TUI: Proposal -> Persistence. Worker that handles TUI: Proposal -> Persistence.
@@ -93,8 +151,11 @@ class PipelineOrchestrator:
# Pass the proposal queue to the app. # Pass the proposal queue to the app.
app = ConfirmationApp(proposal_queue=self.proposal_queue) app = ConfirmationApp(proposal_queue=self.proposal_queue)
await app.run_async() await app.run_async()
# Once the TUI exits, stop the entire pipeline
self.stop()
except Exception as e: except Exception as e:
logger.error(f"TUI Worker error: {e}") logger.error(f"TUI Worker error: {e}")
self.stop()
async def run(self): async def run(self):
""" """
Binary file not shown.
Binary file not shown.
Binary file not shown.
+1 -1
View File
@@ -5,7 +5,7 @@ import numpy as np
import sounddevice as sd import sounddevice as sd
import torch import torch
logging.basicConfig(level=logging.INFO) # Do not call basicConfig here, as it's called in the orchestrator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
+1 -1
View File
@@ -2,7 +2,7 @@ import logging
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
logging.basicConfig(level=logging.INFO) # Do not call basicConfig here, as it's called in the orchestrator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
Binary file not shown.
Binary file not shown.
Binary file not shown.
+3 -4
View File
@@ -140,13 +140,12 @@ class ConfirmationApp(App):
try: try:
result = await self.proposal_queue.get() result = await self.proposal_queue.get()
self.add_result(result) self.add_result(result)
except Exception as e:
# Log error but keep the worker running
self.log(f"Error polling proposal queue: {e}")
finally:
# Signal that the item has been processed # Signal that the item has been processed
if hasattr(self.proposal_queue, "task_done"): if hasattr(self.proposal_queue, "task_done"):
self.proposal_queue.task_done() self.proposal_queue.task_done()
except Exception as e:
# Log error but keep the worker running
self.log(f"Error polling proposal queue: {e}")
def add_result(self, result: ExtractionResult) -> None: def add_result(self, result: ExtractionResult) -> None:
""" """