Refactor LLM processor and improve async handling
Move contextual information handling from noise filtering to extraction and centralize LLM call logic. Wrap blocking transcription and state update calls in asyncio.to_thread to prevent event loop blocking. Update transcriber model size to base.
This commit is contained in:
@@ -55,10 +55,6 @@ class ContextUpdate(BaseModel):
|
||||
|
||||
|
||||
class FilterResult(BaseModel):
|
||||
contextual_info: str = Field(
|
||||
...,
|
||||
description="Information interesting to the user but not useful for structured extraction",
|
||||
)
|
||||
filtered_text: str = Field(
|
||||
..., description="Cleaned transcript used for structured data extraction"
|
||||
)
|
||||
|
||||
+17
-28
@@ -61,6 +61,18 @@ class LLMProcessor:
|
||||
|
||||
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
|
||||
|
||||
def _strip_markdown_code_blocks(self, content: str) -> str:
|
||||
"""
|
||||
Strips markdown code blocks (e.g., ```json ... ```) from the content.
|
||||
"""
|
||||
import re
|
||||
|
||||
# Remove opening and closing code blocks
|
||||
content = re.sub(
|
||||
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
||||
).strip()
|
||||
return content
|
||||
|
||||
def _call_llm(
|
||||
self,
|
||||
system_prompt: str,
|
||||
@@ -93,15 +105,7 @@ class LLMProcessor:
|
||||
)
|
||||
content = response.choices[0].message.content
|
||||
|
||||
# Strip markdown code blocks if present
|
||||
if content.startswith("```"):
|
||||
import re
|
||||
|
||||
content = re.sub(
|
||||
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
||||
).strip()
|
||||
|
||||
return content
|
||||
return self._strip_markdown_code_blocks(content)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM Error: {e}")
|
||||
return ""
|
||||
@@ -147,34 +151,19 @@ class LLMProcessor:
|
||||
"""
|
||||
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
|
||||
try:
|
||||
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
|
||||
logger.info("LLM Processor (Extract): Sending request to backend...")
|
||||
|
||||
system_prompt = EXTRACTION_SYSTEM_PROMPT
|
||||
if context:
|
||||
system_prompt += f"\n{context}"
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
]
|
||||
messages.append({"role": "user", "content": filtered_text})
|
||||
|
||||
for message in messages:
|
||||
logger.info(f"LLM Processor (Extract): Message: {message}")
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
result = self._call_llm(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=filtered_text,
|
||||
response_format={"type": "json_object"},
|
||||
extra_body={"enable_thinking": False},
|
||||
)
|
||||
logger.info("LLM Processor (Extract): Response received from backend.")
|
||||
|
||||
import json
|
||||
|
||||
content = response.choices[0].message.content
|
||||
logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
|
||||
data = json.loads(content)
|
||||
data = json.loads(result)
|
||||
|
||||
# Map the JSON data to the Pydantic model
|
||||
return ExtractionResult(**data)
|
||||
|
||||
+2
-5
@@ -12,7 +12,6 @@ NOISE_FILTER_SYSTEM_PROMPT = """
|
||||
You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise.
|
||||
|
||||
You must output your response as a JSON object with the following keys:
|
||||
- "contextual_info": Information that is interesting or relevant to the story/session but doesn't fit into lore, character state, or significant events (e.g., flavor text, atmospheric descriptions, player commentary that adds context).
|
||||
- "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups.
|
||||
|
||||
Keep the original speakers' names if they are present in the transcript.
|
||||
@@ -22,13 +21,11 @@ Do not add any commentary or summaries. Just filter the text.
|
||||
EXTRACTION_SYSTEM_PROMPT = """
|
||||
You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
|
||||
Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).
|
||||
|
||||
DO NOT THINK.
|
||||
In addition extracting updates to character state and lore, look for the oppertunity to provide useful context,
|
||||
such as the answer to a player's question or the resolution of a lore fact.
|
||||
|
||||
CONSTRAINTS:
|
||||
- OUTPUT ONLY VALID JSON.
|
||||
- DO NOT include any commentary, explanations, or "thought" blocks.
|
||||
- DO NOT include any keys other than "lore", "character_state", and "events".
|
||||
- If no relevant information is found, return empty lists for all keys.
|
||||
- If a character name is not specified (e.g., "Your character"), use "Player Character".
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class PipelineOrchestrator:
|
||||
|
||||
# Modules
|
||||
self.listener = AudioListener(loop=self.loop)
|
||||
self.transcriber = Transcriber(model_size="small")
|
||||
self.transcriber = Transcriber(model_size="base")
|
||||
self.processor = LLMProcessor()
|
||||
self.rag_manager = RAGManager()
|
||||
|
||||
@@ -107,7 +107,9 @@ class PipelineOrchestrator:
|
||||
full_audio = np.concatenate(self.audio_buffer)
|
||||
|
||||
# Transcribe (WhisperX now returns a list of (speaker, text, start, end))
|
||||
results = self.transcriber.transcribe(full_audio)
|
||||
results = await asyncio.to_thread(
|
||||
self.transcriber.transcribe, full_audio
|
||||
)
|
||||
|
||||
# Filter for only new segments that start after the last processed segment
|
||||
new_segments = [
|
||||
|
||||
+3
-3
@@ -263,7 +263,7 @@ class ConfirmationApp(App):
|
||||
self.ui_to_llm_queue.put_nowait(text)
|
||||
input_widget.value = ""
|
||||
|
||||
def action_accept(self) -> None:
|
||||
async def action_accept(self) -> None:
|
||||
table = self.query_one("#pending-facts-table", DataTable)
|
||||
row_index = table.cursor_row
|
||||
if row_index < 0 or row_index >= len(self.pending_updates):
|
||||
@@ -271,9 +271,9 @@ class ConfirmationApp(App):
|
||||
|
||||
update = self.pending_updates[row_index]
|
||||
if isinstance(update, LoreUpdate):
|
||||
update_lore(update)
|
||||
await asyncio.to_thread(update_lore, update)
|
||||
elif isinstance(update, CharacterStateUpdate):
|
||||
update_character_state(update)
|
||||
await asyncio.to_thread(update_character_state, update)
|
||||
|
||||
self.remove_update(row_index)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user