Refactor LLM processor and improve async handling
Move contextual information handling from noise filtering to extraction and centralize LLM call logic. Wrap blocking transcription and state update calls in asyncio.to_thread to prevent event loop blocking. Update transcriber model size to base.
This commit is contained in:
@@ -55,10 +55,6 @@ class ContextUpdate(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class FilterResult(BaseModel):
|
class FilterResult(BaseModel):
|
||||||
contextual_info: str = Field(
|
|
||||||
...,
|
|
||||||
description="Information interesting to the user but not useful for structured extraction",
|
|
||||||
)
|
|
||||||
filtered_text: str = Field(
|
filtered_text: str = Field(
|
||||||
..., description="Cleaned transcript used for structured data extraction"
|
..., description="Cleaned transcript used for structured data extraction"
|
||||||
)
|
)
|
||||||
|
|||||||
+17
-28
@@ -61,6 +61,18 @@ class LLMProcessor:
|
|||||||
|
|
||||||
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
|
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
|
||||||
|
|
||||||
|
def _strip_markdown_code_blocks(self, content: str) -> str:
|
||||||
|
"""
|
||||||
|
Strips markdown code blocks (e.g., ```json ... ```) from the content.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Remove opening and closing code blocks
|
||||||
|
content = re.sub(
|
||||||
|
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
||||||
|
).strip()
|
||||||
|
return content
|
||||||
|
|
||||||
def _call_llm(
|
def _call_llm(
|
||||||
self,
|
self,
|
||||||
system_prompt: str,
|
system_prompt: str,
|
||||||
@@ -93,15 +105,7 @@ class LLMProcessor:
|
|||||||
)
|
)
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
# Strip markdown code blocks if present
|
return self._strip_markdown_code_blocks(content)
|
||||||
if content.startswith("```"):
|
|
||||||
import re
|
|
||||||
|
|
||||||
content = re.sub(
|
|
||||||
r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
|
|
||||||
).strip()
|
|
||||||
|
|
||||||
return content
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM Error: {e}")
|
logger.error(f"LLM Error: {e}")
|
||||||
return ""
|
return ""
|
||||||
@@ -147,34 +151,19 @@ class LLMProcessor:
|
|||||||
"""
|
"""
|
||||||
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
|
logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
|
||||||
try:
|
try:
|
||||||
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
|
|
||||||
logger.info("LLM Processor (Extract): Sending request to backend...")
|
|
||||||
|
|
||||||
system_prompt = EXTRACTION_SYSTEM_PROMPT
|
system_prompt = EXTRACTION_SYSTEM_PROMPT
|
||||||
if context:
|
if context:
|
||||||
system_prompt += f"\n{context}"
|
system_prompt += f"\n{context}"
|
||||||
|
|
||||||
messages = [
|
result = self._call_llm(
|
||||||
{"role": "system", "content": system_prompt},
|
system_prompt=system_prompt,
|
||||||
]
|
user_prompt=filtered_text,
|
||||||
messages.append({"role": "user", "content": filtered_text})
|
|
||||||
|
|
||||||
for message in messages:
|
|
||||||
logger.info(f"LLM Processor (Extract): Message: {message}")
|
|
||||||
|
|
||||||
response = self.client.chat.completions.create(
|
|
||||||
model=self.model,
|
|
||||||
messages=messages,
|
|
||||||
response_format={"type": "json_object"},
|
response_format={"type": "json_object"},
|
||||||
extra_body={"enable_thinking": False},
|
|
||||||
)
|
)
|
||||||
logger.info("LLM Processor (Extract): Response received from backend.")
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
content = response.choices[0].message.content
|
data = json.loads(result)
|
||||||
logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
|
|
||||||
data = json.loads(content)
|
|
||||||
|
|
||||||
# Map the JSON data to the Pydantic model
|
# Map the JSON data to the Pydantic model
|
||||||
return ExtractionResult(**data)
|
return ExtractionResult(**data)
|
||||||
|
|||||||
+2
-5
@@ -12,7 +12,6 @@ NOISE_FILTER_SYSTEM_PROMPT = """
|
|||||||
You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise.
|
You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise.
|
||||||
|
|
||||||
You must output your response as a JSON object with the following keys:
|
You must output your response as a JSON object with the following keys:
|
||||||
- "contextual_info": Information that is interesting or relevant to the story/session but doesn't fit into lore, character state, or significant events (e.g., flavor text, atmospheric descriptions, player commentary that adds context).
|
|
||||||
- "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups.
|
- "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups.
|
||||||
|
|
||||||
Keep the original speakers' names if they are present in the transcript.
|
Keep the original speakers' names if they are present in the transcript.
|
||||||
@@ -22,13 +21,11 @@ Do not add any commentary or summaries. Just filter the text.
|
|||||||
EXTRACTION_SYSTEM_PROMPT = """
|
EXTRACTION_SYSTEM_PROMPT = """
|
||||||
You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
|
You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
|
||||||
Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).
|
Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).
|
||||||
|
In addition extracting updates to character state and lore, look for the oppertunity to provide useful context,
|
||||||
DO NOT THINK.
|
such as the answer to a player's question or the resolution of a lore fact.
|
||||||
|
|
||||||
CONSTRAINTS:
|
CONSTRAINTS:
|
||||||
- OUTPUT ONLY VALID JSON.
|
- OUTPUT ONLY VALID JSON.
|
||||||
- DO NOT include any commentary, explanations, or "thought" blocks.
|
|
||||||
- DO NOT include any keys other than "lore", "character_state", and "events".
|
|
||||||
- If no relevant information is found, return empty lists for all keys.
|
- If no relevant information is found, return empty lists for all keys.
|
||||||
- If a character name is not specified (e.g., "Your character"), use "Player Character".
|
- If a character name is not specified (e.g., "Your character"), use "Player Character".
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class PipelineOrchestrator:
|
|||||||
|
|
||||||
# Modules
|
# Modules
|
||||||
self.listener = AudioListener(loop=self.loop)
|
self.listener = AudioListener(loop=self.loop)
|
||||||
self.transcriber = Transcriber(model_size="small")
|
self.transcriber = Transcriber(model_size="base")
|
||||||
self.processor = LLMProcessor()
|
self.processor = LLMProcessor()
|
||||||
self.rag_manager = RAGManager()
|
self.rag_manager = RAGManager()
|
||||||
|
|
||||||
@@ -107,7 +107,9 @@ class PipelineOrchestrator:
|
|||||||
full_audio = np.concatenate(self.audio_buffer)
|
full_audio = np.concatenate(self.audio_buffer)
|
||||||
|
|
||||||
# Transcribe (WhisperX now returns a list of (speaker, text, start, end))
|
# Transcribe (WhisperX now returns a list of (speaker, text, start, end))
|
||||||
results = self.transcriber.transcribe(full_audio)
|
results = await asyncio.to_thread(
|
||||||
|
self.transcriber.transcribe, full_audio
|
||||||
|
)
|
||||||
|
|
||||||
# Filter for only new segments that start after the last processed segment
|
# Filter for only new segments that start after the last processed segment
|
||||||
new_segments = [
|
new_segments = [
|
||||||
|
|||||||
+3
-3
@@ -263,7 +263,7 @@ class ConfirmationApp(App):
|
|||||||
self.ui_to_llm_queue.put_nowait(text)
|
self.ui_to_llm_queue.put_nowait(text)
|
||||||
input_widget.value = ""
|
input_widget.value = ""
|
||||||
|
|
||||||
def action_accept(self) -> None:
|
async def action_accept(self) -> None:
|
||||||
table = self.query_one("#pending-facts-table", DataTable)
|
table = self.query_one("#pending-facts-table", DataTable)
|
||||||
row_index = table.cursor_row
|
row_index = table.cursor_row
|
||||||
if row_index < 0 or row_index >= len(self.pending_updates):
|
if row_index < 0 or row_index >= len(self.pending_updates):
|
||||||
@@ -271,9 +271,9 @@ class ConfirmationApp(App):
|
|||||||
|
|
||||||
update = self.pending_updates[row_index]
|
update = self.pending_updates[row_index]
|
||||||
if isinstance(update, LoreUpdate):
|
if isinstance(update, LoreUpdate):
|
||||||
update_lore(update)
|
await asyncio.to_thread(update_lore, update)
|
||||||
elif isinstance(update, CharacterStateUpdate):
|
elif isinstance(update, CharacterStateUpdate):
|
||||||
update_character_state(update)
|
await asyncio.to_thread(update_character_state, update)
|
||||||
|
|
||||||
self.remove_update(row_index)
|
self.remove_update(row_index)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user