Small changes

Refactor LLM processor and improve async handling
Move contextual information handling from noise filtering to extraction and centralize LLM call logic. Wrap blocking transcription and state update calls in asyncio.to_thread to prevent event loop blocking. Update transcriber model size to base.
2026-05-28 22:08:00 -07:00 · 2026-05-28 18:54:09 -07:00
6 changed files with 66 additions and 85 deletions
@@ -1,7 +1,7 @@
 # D&D Helpers Configuration
 OPENAI_API_KEY=no-key-required
 OPENAI_BASE_URL=https://vllm.tipsy.codes/v1
-LLM_MODEL=Intel/gemma-4-31B-it-int4-AutoRound
+LLM_MODEL=google/gemma-4-26b-a4b-it
 #LLM_BACKEND=ollama
 #LLM_MODEL=gemma:2b
 WHISPER_MODEL=base
@@ -55,10 +55,6 @@ class ContextUpdate(BaseModel):


 class FilterResult(BaseModel):
-    contextual_info: str = Field(
-        ...,
-        description="Information interesting to the user but not useful for structured extraction",
-    )
    filtered_text: str = Field(
        ..., description="Cleaned transcript used for structured data extraction"
    )
@@ -61,6 +61,18 @@ class LLMProcessor:

        self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")

+    def _strip_markdown_code_blocks(self, content: str) -> str:
+        """
+        Strips markdown code blocks (e.g., ```json ... ```) from the content.
+        """
+        import re
+
+        # Remove opening and closing code blocks
+        content = re.sub(
+            r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
+        ).strip()
+        return content
+
    def _call_llm(
        self,
        system_prompt: str,
@@ -93,15 +105,7 @@ class LLMProcessor:
            )
            content = response.choices[0].message.content

-            # Strip markdown code blocks if present
-            if content.startswith("```"):
-                import re
-
-                content = re.sub(
-                    r"^```(?:json)?\n?|```$", "", content, flags=re.MULTILINE
-                ).strip()
-
-            return content
+            return self._strip_markdown_code_blocks(content)
        except Exception as e:
            logger.error(f"LLM Error: {e}")
            return ""
@@ -147,34 +151,19 @@ class LLMProcessor:
        """
        logger.info(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
        try:
-            # Using standard chat.completions.create with JSON mode for better compatibility with vLLM
-            logger.info("LLM Processor (Extract): Sending request to backend...")
-
            system_prompt = EXTRACTION_SYSTEM_PROMPT
            if context:
                system_prompt += f"\n{context}"

-            messages = [
-                {"role": "system", "content": system_prompt},
-            ]
-            messages.append({"role": "user", "content": filtered_text})
-
-            for message in messages:
-                logger.info(f"LLM Processor (Extract): Message: {message}")
-
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
+            result = self._call_llm(
+                system_prompt=system_prompt,
+                user_prompt=filtered_text,
                response_format={"type": "json_object"},
-                extra_body={"enable_thinking": False},
            )
-            logger.info("LLM Processor (Extract): Response received from backend.")

            import json

-            content = response.choices[0].message.content
-            logger.info(f"LLM Processor (Extract): Raw JSON response: {content}")
-            data = json.loads(content)
+            data = json.loads(result)

            # Map the JSON data to the Pydantic model
            return ExtractionResult(**data)
@@ -12,7 +12,6 @@ NOISE_FILTER_SYSTEM_PROMPT = """
 You are a D&D Game Master's assistant. Given a transcript, remove all out-of-character (OOC) chatter, logistical discussions (e.g., 'Where is my d20?'), and non-relevant noise.

 You must output your response as a JSON object with the following keys:
- "contextual_info": Information that is interesting or relevant to the story/session but doesn't fit into lore, character state, or significant events (e.g., flavor text, atmospheric descriptions, player commentary that adds context).
 - "filtered_text": The cleaned transcript. IMPORTANT: Keep all player questions, requests for rule clarifications, and mentions of spells, NPCs, or locations in this field, as they are used to trigger knowledge base lookups.

 Keep the original speakers' names if they are present in the transcript.
@@ -22,13 +21,11 @@ Do not add any commentary or summaries. Just filter the text.
 EXTRACTION_SYSTEM_PROMPT = """
 You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
 Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).
-
-DO NOT THINK.
+In addition extracting updates to character state and lore, look for the oppertunity to provide useful context,
+such as the answer to a player's question or the resolution of a lore fact.

 CONSTRAINTS:
 - OUTPUT ONLY VALID JSON.
- DO NOT include any commentary, explanations, or "thought" blocks.
- DO NOT include any keys other than "lore", "character_state", and "events".
 - If no relevant information is found, return empty lists for all keys.
 - If a character name is not specified (e.g., "Your character"), use "Player Character".

@@ -44,7 +44,7 @@ class PipelineOrchestrator:

        # Modules
        self.listener = AudioListener(loop=self.loop)
-        self.transcriber = Transcriber(model_size="small")
+        self.transcriber = Transcriber(model_size="base")
        self.processor = LLMProcessor()
        self.rag_manager = RAGManager()

@@ -54,6 +54,7 @@ class PipelineOrchestrator:
        self.clean_to_llm_queue = asyncio.Queue()
        self.llm_to_ui_queue = asyncio.Queue()
        self.log_queue = asyncio.Queue()
+        self.persistence_queue = asyncio.Queue()

        self.is_running = False

@@ -107,7 +108,9 @@ class PipelineOrchestrator:
                full_audio = np.concatenate(self.audio_buffer)

                # Transcribe (WhisperX now returns a list of (speaker, text, start, end))
-                results = self.transcriber.transcribe(full_audio)
+                results = await asyncio.to_thread(
+                    self.transcriber.transcribe, full_audio
+                )

                # Filter for only new segments that start after the last processed segment
                new_segments = [
@@ -184,8 +187,11 @@ class PipelineOrchestrator:
        async def feed_ui():
            while self.is_running:
                try:
-                    text = await self.ui_to_llm_queue.get()
-                    await internal_queue.put(("UI", text))
+                    item = await self.ui_to_llm_queue.get()
+                    if isinstance(item, (LoreUpdate, CharacterStateUpdate)):
+                        await self.persistence_queue.put(item)
+                    else:
+                        await internal_queue.put(("UI", item))
                except Exception as e:
                    logger.error(f"LLM Feeder (UI) error: {e}")

@@ -213,20 +219,8 @@ class PipelineOrchestrator:
                    context=context,
                )

-                # Persistence: Lore Updates
-                for lore_update in extraction_result.lore_updates:
-                    file_path = await asyncio.to_thread(update_lore, lore_update)
-                    await asyncio.to_thread(self.rag_manager.ingest_file, file_path)
-                    logger.info(
-                        f"LLM Worker: Lore updated and ingested into RAG: {lore_update.entity_name}"
-                    )
-
-                # Persistence: Character State Updates
-                for char_update in extraction_result.character_updates:
-                    await asyncio.to_thread(update_character_state, char_update)
-                    logger.info(
-                        f"LLM Worker: Character {char_update.character_name} state updated."
-                    )
+                # Send the entire result to UI for confirmation
+                await self.llm_to_ui_queue.put(extraction_result)

                # UI Notification: Context Updates
                for context_update in extraction_result.context_updates:
@@ -243,29 +237,32 @@ class PipelineOrchestrator:
        for f in feeders:
            f.cancel()

-    def _get_wiki_context(self) -> str:
+    async def persistence_worker(self):
        """
-        Reads all files in the lore directory and returns them as a 저희 context string.
+        Worker that handles persistence: Confirmed updates -> Disk & RAG.
        """
-        from src.persistence.lore import DATA_LORE_DIR
-
-        wiki_contents = []
-        # Recursively find all .md files in the lore directory
-        for path in DATA_LORE_DIR.rglob("*.md"):
+        logger.info("Persistence Worker started.")
+        while self.is_running:
            try:
-                with open(path, "r", encoding="utf-8") as f:
-                    content = f.read()
-                    wiki_contents.append(
-                        f"File: {path.relative_to(DATA_LORE_DIR)}\nContent:\n{content}"
+                update = await self.persistence_queue.get()
+                if isinstance(update, LoreUpdate):
+                    file_path = await asyncio.to_thread(update_lore, update)
+                    await asyncio.to_thread(self.rag_manager.ingest_file, file_path)
+                    logger.info(
+                        f"Persistence Worker: Lore updated and ingested into RAG: {update.entity_name}"
+                    )
+                elif isinstance(update, CharacterStateUpdate):
+                    await asyncio.to_thread(update_character_state, update)
+                    logger.info(
+                        f"Persistence Worker: Character {update.character_name} state updated."
                    )
-            except Exception as e:
-                logger.error(f"Error reading wiki file {path}: {e}")

-        return (
-            "\n\n".join(wiki_contents)
-            if wiki_contents
-            else "No wiki knowledge available."
-        )
+                if hasattr(self.persistence_queue, "task_done"):
+                    self.persistence_queue.task_done()
+            except Exception as e:
+                logger.error(f"Persistence Worker error: {e}")
+
+            await asyncio.sleep(0.1)

    async def tui_worker(self):
        """
@@ -306,6 +303,7 @@ class PipelineOrchestrator:
            asyncio.create_task(self.stt_worker()),
            asyncio.create_task(self.clean_worker()),
            asyncio.create_task(self.llm_worker()),
+            asyncio.create_task(self.persistence_worker()),
            asyncio.create_task(self.tui_worker()),
        ]

@@ -213,6 +213,9 @@ class ConfirmationApp(App):
        while True:
            try:
                update = await self.llm_to_ui_queue.get()
+                if isinstance(update, ExtractionResult):
+                    self.handle_proposal_result(update)
+                elif isinstance(update, ContextUpdate):
                    display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}"
                    context_list = self.query_one("#context-pane", ListView)
                    # ListView.insert takes an *iterable* of ListItems; passing a
@@ -263,17 +266,15 @@ class ConfirmationApp(App):
            self.ui_to_llm_queue.put_nowait(text)
            input_widget.value = ""

-    def action_accept(self) -> None:
+    async def action_accept(self) -> None:
        table = self.query_one("#pending-facts-table", DataTable)
        row_index = table.cursor_row
        if row_index < 0 or row_index >= len(self.pending_updates):
            return

        update = self.pending_updates[row_index]
-        if isinstance(update, LoreUpdate):
-            update_lore(update)
-        elif isinstance(update, CharacterStateUpdate):
-            update_character_state(update)
+        if self.ui_to_llm_queue:
+            self.ui_to_llm_queue.put_nowait(update)

        self.remove_update(row_index)