diff --git a/.env b/.env
index eb21030..2a76d88 100644
--- a/.env
+++ b/.env
@@ -1,5 +1,6 @@
 # D&D Helpers Configuration
-OPENAI_API_KEY=your_api_key_here
-LLM_MODEL=gpt-4o
+OPENAI_API_KEY=no-key-required
+OPENAI_BASE_URL=https://vllm.tipsy.codes/v1
+LLM_MODEL=Intel/gemma-4-31B-it-int4-AutoRound
 WHISPER_MODEL=base
 AUDIO_DEVICE_ID=None
diff --git a/.gitignore b/.gitignore
index d4f588e..86eb1c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 artifacts/
+__pycache__
diff --git a/src/llm/__pycache__/models.cpython-314.pyc b/src/llm/__pycache__/models.cpython-314.pyc
index caf94d1..2dab234 100644
Binary files a/src/llm/__pycache__/models.cpython-314.pyc and b/src/llm/__pycache__/models.cpython-314.pyc differ
diff --git a/src/llm/__pycache__/processor.cpython-314.pyc b/src/llm/__pycache__/processor.cpython-314.pyc
index fbe0f9f..ee70c98 100644
Binary files a/src/llm/__pycache__/processor.cpython-314.pyc and b/src/llm/__pycache__/processor.cpython-314.pyc differ
diff --git a/src/llm/models.py b/src/llm/models.py
index d11ee0f..72256f8 100644
--- a/src/llm/models.py
+++ b/src/llm/models.py
@@ -46,11 +46,18 @@ class CharacterStateUpdate(BaseModel):
 
 class ExtractionResult(BaseModel):
     lore_updates: List[LoreUpdate] = Field(
-        default_factory=list, description="List of discovered lore facts"
+        default_factory=list, description="List of discovered lore facts", alias="lore"
     )
     character_updates: List[CharacterStateUpdate] = Field(
-        default_factory=list, description="List of character state changes"
+        default_factory=list,
+        description="List of character state changes",
+        alias="character_state",
     )
     significant_events: List[str] = Field(
-        default_factory=list, description="List of significant plot points or events"
+        default_factory=list,
+        description="List of significant plot points or events",
+        alias="events",
     )
+
+    class Config:
+        populate_by_name = True
diff --git a/src/llm/processor.py b/src/llm/processor.py
index 2036508..df69d56 100644
--- a/src/llm/processor.py
+++ b/src/llm/processor.py
@@ -13,20 +13,20 @@ class LLMProcessor:
         self,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
-        model: str = "gpt-4o",
+        model: Optional[str] = None,
     ):
         """
         Initializes the LLMProcessor.
 
         :param api_key: OpenAI API key. If None, it looks for OPENAI_API_KEY in environment variables.
         :param base_url: OpenAI-compatible base URL (e.g., for vLLM).
-        :param model: The model to use for processing.
+        :param model: The model to use for processing. If None, it looks for LLM_MODEL in environment variables.
         """
         self.client = OpenAI(
             api_key=api_key or os.environ.get("OPENAI_API_KEY"),
             base_url=base_url or os.environ.get("OPENAI_BASE_URL"),
         )
-        self.model = model
+        self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
 
     def _call_llm(
         self,
@@ -45,6 +45,7 @@ class LLMProcessor:
                     {"role": "user", "content": user_prompt},
                 ],
                 response_format=response_format,
+                extra_body={"include_reasoning": False},
             )
             return response.choices[0].message.content
         except Exception as e:
@@ -55,27 +56,36 @@ class LLMProcessor:
         """
         Stage 1: Raw Transcript -> Filtered Text.
         """
-        return self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text)
+        result = self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text)
+        print(f"LLM Processor (Filter): {text} -> {result}")
+        return result
 
     def extract_structured_data(self, filtered_text: str) -> ExtractionResult:
         """
         Stage 2: Filtered Text -> Structured Data.
         """
-        # We use OpenAI's structured output (JSON mode/tool calling) via Pydantic's response_format.
-        # For models that support it, we can pass the Pydantic model directly.
-        # If we are using an older model or vLLM, we might need to manually parse the JSON.
-
-        # Using the newer 'beta.chat.completions.parse' for Pydantic support
+        print(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
         try:
-            completion = self.client.beta.chat.completions.parse(
+            # Using standard chat.completions.create with JSON mode for better compatibility with vLLM
+            response = self.client.chat.completions.create(
                 model=self.model,
                 messages=[
                     {"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
                     {"role": "user", "content": filtered_text},
                 ],
-                response_format=ExtractionResult,
+                response_format={"type": "json_object"},
+                extra_body={"include_reasoning": False},
             )
-            return completion.choices[0].message.parsed
+
+            import json
+
+            content = response.choices[0].message.content
+            print(f"LLM Processor (Extract): Raw JSON response: {content}")
+            data = json.loads(content)
+
+            # Map the JSON data to the Pydantic model
+            return ExtractionResult(**data)
+
         except Exception as e:
             print(f"Extraction Error: {e}")
             # Return an empty ExtractionResult if parsing fails
diff --git a/src/pipeline/__pycache__/orchestrator.cpython-314.pyc b/src/pipeline/__pycache__/orchestrator.cpython-314.pyc
index deb8141..94c2e6c 100644
Binary files a/src/pipeline/__pycache__/orchestrator.cpython-314.pyc and b/src/pipeline/__pycache__/orchestrator.cpython-314.pyc differ
diff --git a/src/pipeline/orchestrator.py b/src/pipeline/orchestrator.py
index 17919ec..b5f54a2 100644
--- a/src/pipeline/orchestrator.py
+++ b/src/pipeline/orchestrator.py
@@ -59,9 +59,10 @@ class PipelineOrchestrator:
                 # Get raw text from transcript queue
                 raw_text = await self.transcript_queue.get()
 
-                logger.info(f"Processing text: {raw_text}")
+                logger.info(f"LLM Worker: Processing text: {raw_text}")
 
                 # Process via LLM (Filter -> Extract)
+                # Note: this is currently a synchronous call, which blocks the loop.
                 result = self.processor.process_pipeline(raw_text)
 
                 if (
@@ -69,10 +70,12 @@ class PipelineOrchestrator:
                     or result.character_updates
                     or result.significant_events
                 ):
-                    logger.info("Proposal generated. Putting into proposal queue.")
+                    logger.info(
+                        f"LLM Worker: Proposal generated. Putting into proposal queue. (Lore: {len(result.lore_updates)}, Char: {len(result.character_updates)})"
+                    )
                     await self.proposal_queue.put(result)
                 else:
-                    logger.info("No relevant game data extracted.")
+                    logger.info("LLM Worker: No relevant game data extracted.")
 
             except Exception as e:
                 logger.error(f"LLM Worker error: {e}")
diff --git a/src/stt/__pycache__/listener.cpython-314.pyc b/src/stt/__pycache__/listener.cpython-314.pyc
index 30fb0aa..006facc 100644
Binary files a/src/stt/__pycache__/listener.cpython-314.pyc and b/src/stt/__pycache__/listener.cpython-314.pyc differ
diff --git a/src/stt/__pycache__/transcriber.cpython-314.pyc b/src/stt/__pycache__/transcriber.cpython-314.pyc
index 16fe8f7..8936e12 100644
Binary files a/src/stt/__pycache__/transcriber.cpython-314.pyc and b/src/stt/__pycache__/transcriber.cpython-314.pyc differ
diff --git a/src/stt/listener.py b/src/stt/listener.py
index 07a0f3c..16665ed 100644
--- a/src/stt/listener.py
+++ b/src/stt/listener.py
@@ -41,6 +41,9 @@ class AudioListener:
             target_samples = int(self.sample_rate * self.chunk_duration)
             chunk = chunk[:target_samples]
 
+            # Flatten to 1D array (samples,) as expected by faster-whisper
+            chunk = chunk.flatten()
+
             # Use call_soon_threadsafe to put the chunk into the asyncio queue from the callback thread
             self.loop.call_soon_threadsafe(self.audio_queue.put_nowait, chunk)
             self._buffer = []
diff --git a/src/stt/transcriber.py b/src/stt/transcriber.py
index ce96c33..188bbe8 100644
--- a/src/stt/transcriber.py
+++ b/src/stt/transcriber.py
@@ -46,8 +46,8 @@ class Transcriber:
             return ""
 
         try:
-            # faster-whisper expects audio in float32
-            audio_data = audio_chunk.astype("float32")
+            # faster-whisper expects audio in float32 and 1D array
+            audio_data = audio_chunk.astype("float32").flatten()
 
             # Transcribe the audio
             segments, info = self.model.transcribe(audio_data, beam_size=5)
diff --git a/src/ui/__pycache__/cli.cpython-314.pyc b/src/ui/__pycache__/cli.cpython-314.pyc
index cd84994..164d568 100644
Binary files a/src/ui/__pycache__/cli.cpython-314.pyc and b/src/ui/__pycache__/cli.cpython-314.pyc differ
diff --git a/src/ui/cli.py b/src/ui/cli.py
index 7fcb6c1..d1075a9 100644
--- a/src/ui/cli.py
+++ b/src/ui/cli.py
@@ -2,11 +2,14 @@ import asyncio
 from typing import List
 
 import typer
+from dotenv import load_dotenv
 
 from src.llm.models import CharacterStateUpdate, ExtractionResult, LoreUpdate
 from src.pipeline.orchestrator import PipelineOrchestrator
 from src.ui.tui import ConfirmationApp
 
+load_dotenv()
+
 app = typer.Typer(help="D&D Helpers CLI")
 
 
@@ -17,14 +20,15 @@ def run():
     """
     typer.echo("Starting D&D Helpers pipeline...")
 
-    loop = asyncio.get_event_loop()
-    orchestrator = PipelineOrchestrator(loop=loop)
+    async def main():
+        loop = asyncio.get_running_loop()
+        orchestrator = PipelineOrchestrator(loop=loop)
+        await orchestrator.run()
 
     try:
-        loop.run_until_complete(orchestrator.run())
+        asyncio.run(main())
     except KeyboardInterrupt:
-        orchestrator.stop()
-        loop.run_until_complete(asyncio.sleep(0))  # Give it a moment to cleanup
+        pass
 
     typer.echo("Pipeline stopped.")