Improve audio capture and LLM integration

- Implement Silero VAD for dynamic audio chunking - Add support for Ollama and vLLM backends - Harden extraction prompts for strict JSON output - Refactor TUI worker to handle proposals asynchronously
2026-05-26 19:51:48 -07:00
parent 60e170e777
commit 58bab75bb5
11 changed files with 290 additions and 78 deletions
@@ -22,10 +22,34 @@ class LLMProcessor:
        :param base_url: OpenAI-compatible base URL (e.g., for vLLM).
        :param model: The model to use for processing. If None, it looks for LLM_MODEL in environment variables.
        """
-        self.client = OpenAI(
-            api_key=api_key or os.environ.get("OPENAI_API_KEY"),
-            base_url=base_url or os.environ.get("OPENAI_BASE_URL"),
-        )
+        backend = os.environ.get("LLM_BACKEND", "openai").lower()
+
+        if backend == "ollama":
+            # Ollama's OpenAI-compatible API
+            final_base_url = base_url or "http://localhost:11434/v1"
+            final_api_key = api_key or "ollama"
+        elif backend == "vllm":
+            # Remote vLLM server
+            final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
+            final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
+        else:  # default to openai
+            final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
+            final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
+
+        try:
+            self.client = OpenAI(
+                api_key=final_api_key,
+                base_url=final_base_url,
+            )
+            # Simple connectivity check for local backends
+            if backend == "ollama":
+                # We can't easily check connectivity without making a call,
+                # but we can ensure the client is initialized.
+                pass
+        except Exception as e:
+            print(f"Error initializing LLM client for backend {backend}: {e}")
+            raise
+
        self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")

    def _call_llm(
@@ -67,6 +91,7 @@ class LLMProcessor:
        print(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
        try:
            # Using standard chat.completions.create with JSON mode for better compatibility with vLLM
+            print("LLM Processor (Extract): Sending request to backend...")
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
@@ -76,6 +101,7 @@ class LLMProcessor:
                response_format={"type": "json_object"},
                extra_body={"include_reasoning": False},
            )
+            print("LLM Processor (Extract): Response received from backend.")

            import json

@@ -11,10 +11,51 @@ EXTRACTION_SYSTEM_PROMPT = """
 You are a D&D session analyzer. Your goal is to extract structured data from a filtered transcript.
 Extract any changes to character states (HP, status effects, inventory) and any new lore facts (NPCs, locations, world-building).

-Guidelines:
-1. Lore: Identify any new information about the world, people, and places.
-2. Character State: Look for mentions of damage, healing, or items being gained or lost.
-3. Events: Note significant plot developments.
+DO NOT THINK.

-Be precise. If no relevant information is found, return empty lists.
+CONSTRAINTS:
+- OUTPUT ONLY VALID JSON.
+- DO NOT include any commentary, explanations, or "thought" blocks.
+- DO NOT include any keys other than "lore", "character_state", and "events".
+- If no relevant information is found, return empty lists for all keys.
+- If a character name is not specified (e.g., "Your character"), use "Player Character".
+
+Strict Output Format:
+Return a JSON object with exactly these keys:
+1. "lore": A list of objects. Each object MUST have:
+   - "category": (string) 'NPC', 'Location', 'WorldBuilding', or 'Plot'
+   - "entity_name": (string) The name of the NPC, Location, or entity
+   - "content": (string) The actual lore fact or description
+2. "character_state": A list of objects. Each object MUST have:
+   - "character_name": (string) Name of the character
+   - "hp_change": (integer, optional) Change in HP
+   - "status_effects_added": (list of strings)
+   - "status_effects_removed": (list of strings)
+   - "inventory_changes": (list of objects with "item", "quantity", "action")
+3. "events": A list of strings. Each string should be a concise description of a significant plot development.
+
+Example Output:
+{
+  "lore": [
+    {
+      "category": "NPC",
+      "entity_name": "Thorne",
+      "content": "A gruff dwarf who runs the local tavern."
+    }
+  ],
+  "character_state": [
+    {
+      "character_name": "Grog",
+      "hp_change": -10,
+      "status_effects_added": [],
+      "status_effects_removed": [],
+      "inventory_changes": []
+    }
+  ],
+  "events": [
+    "The party discovered the secret entrance to the crypt."
+  ]
+}
+
+Be precise. Return only the JSON object.
 """