Compare commits

..

3 Commits

Author SHA1 Message Date
charles 15dfbfb467 Add LLM backend support and improve debugging observability
- Add LLM_BACKEND to environment configuration
- Implement detailed debug logging for LLM request/response cycles
- Add missing llama-index dependencies for embeddings and chroma
- Update prompt constraints to prevent lore redundancy
- Enable CUDA for transcription and set logging to DEBUG level
- Add entry point for running the orchestrator directly
- Cleanup unused comment in TUI context updates
2026-05-28 23:06:25 -07:00
charles 49127d695a Small changes 2026-05-28 22:08:00 -07:00
charles 2363cde160 Refactor LLM processor and improve async handling
Move contextual information handling from noise filtering to extraction
and centralize LLM call logic. Wrap blocking transcription and state
update calls in asyncio.to_thread to prevent event loop blocking.
Update transcriber model size to base.
2026-05-28 18:54:09 -07:00
7 changed files with 96 additions and 86 deletions
+2 -1
View File
@@ -1,7 +1,8 @@
# D&D Helpers Configuration # D&D Helpers Configuration
OPENAI_API_KEY=no-key-required OPENAI_API_KEY=no-key-required
OPENAI_BASE_URL=https://vllm.tipsy.codes/v1 OPENAI_BASE_URL=https://vllm.tipsy.codes/v1
LLM_MODEL=Intel/gemma-4-31B-it-int4-AutoRound LLM_MODEL=google/gemma-4-26b-a4b-it
LLM_BACKEND=vllm
#LLM_BACKEND=ollama #LLM_BACKEND=ollama
#LLM_MODEL=gemma:2b #LLM_MODEL=gemma:2b
WHISPER_MODEL=base WHISPER_MODEL=base
+2
View File
@@ -9,3 +9,5 @@ python-dotenv
llama-index llama-index
chromadb chromadb
pdfplumber pdfplumber
llama-index-embeddings-huggingface
llama-index-vector_stores-chroma
-4
View File
@@ -55,10 +55,6 @@ class ContextUpdate(BaseModel):
class FilterResult(BaseModel): class FilterResult(BaseModel):
contextual_info: str = Field(
...,
description="Information interesting to the user but not useful for structured extraction",
)
filtered_text: str = Field( filtered_text: str = Field(
..., description="Cleaned transcript used for structured data extraction" ..., description="Cleaned transcript used for structured data extraction"
) )
+30 -27
View File
@@ -45,6 +45,7 @@ class LLMProcessor:
final_base_url = base_url or os.environ.get("OPENAI_BASE_URL") final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
final_api_key = api_key or os.environ.get("OPENAI_API_KEY") final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
logger.info(f"Using LLM backend: {backend}")
try: try:
self.client = OpenAI( self.client = OpenAI(
api_key=final_api_key, api_key=final_api_key,
@@ -61,6 +62,18 @@ class LLMProcessor:
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o") self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
def _strip_markdown_code_blocks(self, content: str) -> str:
"""
Strips markdown code blocks (e.g., ```json ... ```) from the content.
"""
import re
# Remove opening and closing code blocks
content = re.sub(
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
).strip()
return content
def _call_llm( def _call_llm(
self, self,
system_prompt: str, system_prompt: str,
@@ -84,6 +97,14 @@ class LLMProcessor:
messages.append({"role": "user", "content": user_prompt}) messages.append({"role": "user", "content": user_prompt})
# Debugging: Dump inputs
logger.debug("--- LLM CALL START ---")
logger.debug(f"Model: {self.model}")
logger.debug(f"Messages: {messages}")
if response_format:
logger.debug(f"Response Format: {response_format}")
logger.debug("--- LLM CALL END ---")
try: try:
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=self.model, model=self.model,
@@ -93,15 +114,12 @@ class LLMProcessor:
) )
content = response.choices[0].message.content content = response.choices[0].message.content
# Strip markdown code blocks if present # Debugging: Dump outputs
if content.startswith("```"): logger.debug("--- LLM RESPONSE START ---")
import re logger.debug(f"Content: {content}")
logger.debug("--- LLM RESPONSE END ---")
content = re.sub( return self._strip_markdown_code_blocks(content)
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
).strip()
return content
except Exception as e: except Exception as e:
logger.error(f"LLM Error: {e}") logger.error(f"LLM Error: {e}")
return "" return ""
@@ -147,34 +165,19 @@ class LLMProcessor:
""" """
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}") logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
try: try:
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
logger.info("LLM Processor (Extract): Sending request to backend...")
system_prompt = EXTRACTION_SYSTEM_PROMPT system_prompt = EXTRACTION_SYSTEM_PROMPT
if context: if context:
system_prompt += f"\n{context}" system_prompt += f"\n{context}"
messages = [ result = self._call_llm(
{"role": "system", "content": system_prompt}, system_prompt=system_prompt,
] user_prompt=filtered_text,
messages.append({"role": "user", "content": filtered_text})
for message in messages:
logger.info(f"LLM Processor (Extract): Message: {message}")
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}, response_format={"type": "json_object"},
extra_body={"enable_thinking": False},
) )
logger.info("LLM Processor (Extract): Response received from backend.")
import json import json
content = response.choices[0].message.content data = json.loads(result)
logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
data = json.loads(content)
# Map the JSON data to the Pydantic model # Map the JSON data to the Pydantic model
return ExtractionResult(**data) return ExtractionResult(**data)
+3 -5
View File
@@ -12,7 +12,6 @@ NOISE_FILTER_SYSTEM_PROMPT = """
You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise. You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise.
You must output your response as a JSON object with the following keys: You must output your response as a JSON object with the following keys:
- "contextual_info": Information that is interesting or relevant to the story/session but doesn't fit into lore, character state, or significant events (e.g., flavor text, atmospheric descriptions, player commentary that adds context).
- "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups. - "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups.
Keep the original speakers' names if they are present in the transcript. Keep the original speakers' names if they are present in the transcript.
@@ -22,15 +21,14 @@ Do not add any commentary or summaries. Just filter the text.
EXTRACTION_SYSTEM_PROMPT = """ EXTRACTION_SYSTEM_PROMPT = """
You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript. You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building). Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).
In addition extracting updates to character state and lore, look for the oppertunity to provide useful context,
DO NOT THINK. such as the answer to a player's question or the resolution of a lore fact.
CONSTRAINTS: CONSTRAINTS:
- OUTPUT ONLY VALID JSON. - OUTPUT ONLY VALID JSON.
- DO NOT include any commentary, explanations, or "thought" blocks.
- DO NOT include any keys other than "lore", "character_state", and "events".
- If no relevant information is found, return empty lists for all keys. - If no relevant information is found, return empty lists for all keys.
- If a character name is not specified (e.g., "Your character"), use "Player Character". - If a character name is not specified (e.g., "Your character"), use "Player Character".
- Do not repeat lore if it is already known; only provide new or updated facts.
Strict Output Format: Strict Output Format:
Return a JSON object with exactly these keys: Return a JSON object with exactly these keys:
+48 -37
View File
@@ -23,7 +23,7 @@ from src.ui.tui import ConfirmationApp
# Configure logging to write to a file instead of stdout # Configure logging to write to a file instead of stdout
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.DEBUG,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[ handlers=[
logging.FileHandler("pipeline.log"), logging.FileHandler("pipeline.log"),
@@ -44,7 +44,7 @@ class PipelineOrchestrator:
# Modules # Modules
self.listener = AudioListener(loop=self.loop) self.listener = AudioListener(loop=self.loop)
self.transcriber = Transcriber(model_size="small") self.transcriber = Transcriber(model_size="base", device="cuda")
self.processor = LLMProcessor() self.processor = LLMProcessor()
self.rag_manager = RAGManager() self.rag_manager = RAGManager()
@@ -54,6 +54,7 @@ class PipelineOrchestrator:
self.clean_to_llm_queue = asyncio.Queue() self.clean_to_llm_queue = asyncio.Queue()
self.llm_to_ui_queue = asyncio.Queue() self.llm_to_ui_queue = asyncio.Queue()
self.log_queue = asyncio.Queue() self.log_queue = asyncio.Queue()
self.persistence_queue = asyncio.Queue()
self.is_running = False self.is_running = False
@@ -107,7 +108,9 @@ class PipelineOrchestrator:
full_audio = np.concatenate(self.audio_buffer) full_audio = np.concatenate(self.audio_buffer)
# Transcribe (WhisperX now returns a list of (speaker, text, start, end)) # Transcribe (WhisperX now returns a list of (speaker, text, start, end))
results = self.transcriber.transcribe(full_audio) results = await asyncio.to_thread(
self.transcriber.transcribe, full_audio
)
# Filter for only new segments that start after the last processed segment # Filter for only new segments that start after the last processed segment
new_segments = [ new_segments = [
@@ -184,8 +187,11 @@ class PipelineOrchestrator:
async def feed_ui(): async def feed_ui():
while self.is_running: while self.is_running:
try: try:
text = await self.ui_to_llm_queue.get() item = await self.ui_to_llm_queue.get()
await internal_queue.put(("UI", text)) if isinstance(item, (LoreUpdate, CharacterStateUpdate)):
await self.persistence_queue.put(item)
else:
await internal_queue.put(("UI", item))
except Exception as e: except Exception as e:
logger.error(f"LLM Feeder (UI) error: {e}") logger.error(f"LLM Feeder (UI) error: {e}")
@@ -213,20 +219,8 @@ class PipelineOrchestrator:
context=context, context=context,
) )
# Persistence: Lore Updates # Send the entire result to UI for confirmation
for lore_update in extraction_result.lore_updates: await self.llm_to_ui_queue.put(extraction_result)
file_path = await asyncio.to_thread(update_lore, lore_update)
await asyncio.to_thread(self.rag_manager.ingest_file, file_path)
logger.info(
f"LLM Worker: Lore updated and ingested into RAG: {lore_update.entity_name}"
)
# Persistence: Character State Updates
for char_update in extraction_result.character_updates:
await asyncio.to_thread(update_character_state, char_update)
logger.info(
f"LLM Worker: Character {char_update.character_name} state updated."
)
# UI Notification: Context Updates # UI Notification: Context Updates
for context_update in extraction_result.context_updates: for context_update in extraction_result.context_updates:
@@ -243,29 +237,32 @@ class PipelineOrchestrator:
for f in feeders: for f in feeders:
f.cancel() f.cancel()
def _get_wiki_context(self) -> str: async def persistence_worker(self):
""" """
Reads all files in the lore directory and returns them as a 저희 context string. Worker that handles persistence: Confirmed updates -> Disk & RAG.
""" """
from src.persistence.lore import DATA_LORE_DIR logger.info("Persistence Worker started.")
while self.is_running:
wiki_contents = []
# Recursively find all .md files in the lore directory
for path in DATA_LORE_DIR.rglob("*.md"):
try: try:
with open(path, "r", encoding="utf-8") as f: update = await self.persistence_queue.get()
content = f.read() if isinstance(update, LoreUpdate):
wiki_contents.append( file_path = await asyncio.to_thread(update_lore, update)
f"File: {path.relative_to(DATA_LORE_DIR)}\nContent:\n{content}" await asyncio.to_thread(self.rag_manager.ingest_file, file_path)
logger.info(
f"Persistence Worker: Lore updated and ingested into RAG: {update.entity_name}"
)
elif isinstance(update, CharacterStateUpdate):
await asyncio.to_thread(update_character_state, update)
logger.info(
f"Persistence Worker: Character {update.character_name} state updated."
) )
except Exception as e:
logger.error(f"Error reading wiki file {path}: {e}")
return ( if hasattr(self.persistence_queue, "task_done"):
"\n\n".join(wiki_contents) self.persistence_queue.task_done()
if wiki_contents except Exception as e:
else "No wiki knowledge available." logger.error(f"Persistence Worker error: {e}")
)
await asyncio.sleep(0.1)
async def tui_worker(self): async def tui_worker(self):
""" """
@@ -306,6 +303,7 @@ class PipelineOrchestrator:
asyncio.create_task(self.stt_worker()), asyncio.create_task(self.stt_worker()),
asyncio.create_task(self.clean_worker()), asyncio.create_task(self.clean_worker()),
asyncio.create_task(self.llm_worker()), asyncio.create_task(self.llm_worker()),
asyncio.create_task(self.persistence_worker()),
asyncio.create_task(self.tui_worker()), asyncio.create_task(self.tui_worker()),
] ]
@@ -330,3 +328,16 @@ class PipelineOrchestrator:
Stops. Stops.
""" """
self.is_running = False self.is_running = False
if __name__ == "__main__":
import asyncio
async def main():
loop = asyncio.get_event_loop()
orchestrator = PipelineOrchestrator(loop)
try:
await orchestrator.run()
except KeyboardInterrupt:
orchestrator.stop()
asyncio.run(main())
+11 -12
View File
@@ -16,7 +16,7 @@ from textual.widgets import (
Static, Static,
) )
from src.llm.models import CharacterStateUpdate, ExtractionResult, LoreUpdate from src.llm.models import CharacterStateUpdate, ContextUpdate, ExtractionResult, LoreUpdate
from src.persistence.characters import update_character_state from src.persistence.characters import update_character_state
from src.persistence.lore import update_lore from src.persistence.lore import update_lore
@@ -213,12 +213,13 @@ class ConfirmationApp(App):
while True: while True:
try: try:
update = await self.llm_to_ui_queue.get() update = await self.llm_to_ui_queue.get()
display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}" if isinstance(update, ExtractionResult):
context_list = self.query_one("#context-pane", ListView) self.handle_proposal_result(update)
# ListView.insert takes an *iterable* of ListItems; passing a elif isinstance(update, ContextUpdate):
# bare ListItem raises TypeError because ListItem is not iterable. display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}"
# Insert at the top to show most recent first. context_list = self.query_one("#context-pane", ListView)
await context_list.insert(0, [ListItem(Static(display_text))]) # Insert at the top to show most recent first.
await context_list.insert(0, [ListItem(Static(display_text))])
if hasattr(self.llm_to_ui_queue, "task_done"): if hasattr(self.llm_to_ui_queue, "task_done"):
self.llm_to_ui_queue.task_done() self.llm_to_ui_queue.task_done()
except Exception as e: except Exception as e:
@@ -263,17 +264,15 @@ class ConfirmationApp(App):
self.ui_to_llm_queue.put_nowait(text) self.ui_to_llm_queue.put_nowait(text)
input_widget.value = "" input_widget.value = ""
def action_accept(self) -> None: async def action_accept(self) -> None:
table = self.query_one("#pending-facts-table", DataTable) table = self.query_one("#pending-facts-table", DataTable)
row_index = table.cursor_row row_index = table.cursor_row
if row_index < 0 or row_index >= len(self.pending_updates): if row_index < 0 or row_index >= len(self.pending_updates):
return return
update = self.pending_updates[row_index] update = self.pending_updates[row_index]
if isinstance(update, LoreUpdate): if self.ui_to_llm_queue:
update_lore(update) self.ui_to_llm_queue.put_nowait(update)
elif isinstance(update, CharacterStateUpdate):
update_character_state(update)
self.remove_update(row_index) self.remove_update(row_index)