diff --git a/.env b/.env index eb21030..2a76d88 100644 --- a/.env +++ b/.env @@ -1,5 +1,6 @@ # D&D Helpers Configuration -OPENAI_API_KEY=your_api_key_here -LLM_MODEL=gpt-4o +OPENAI_API_KEY=no-key-required +OPENAI_BASE_URL=https://vllm.tipsy.codes/v1 +LLM_MODEL=Intel/gemma-4-31B-it-int4-AutoRound WHISPER_MODEL=base AUDIO_DEVICE_ID=None diff --git a/.gitignore b/.gitignore index d4f588e..86eb1c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ artifacts/ +__pycache__ diff --git a/src/llm/__pycache__/models.cpython-314.pyc b/src/llm/__pycache__/models.cpython-314.pyc index caf94d1..2dab234 100644 Binary files a/src/llm/__pycache__/models.cpython-314.pyc and b/src/llm/__pycache__/models.cpython-314.pyc differ diff --git a/src/llm/__pycache__/processor.cpython-314.pyc b/src/llm/__pycache__/processor.cpython-314.pyc index fbe0f9f..ee70c98 100644 Binary files a/src/llm/__pycache__/processor.cpython-314.pyc and b/src/llm/__pycache__/processor.cpython-314.pyc differ diff --git a/src/llm/models.py b/src/llm/models.py index d11ee0f..72256f8 100644 --- a/src/llm/models.py +++ b/src/llm/models.py @@ -46,11 +46,18 @@ class CharacterStateUpdate(BaseModel): class ExtractionResult(BaseModel): lore_updates: List[LoreUpdate] = Field( - default_factory=list, description="List of discovered lore facts" + default_factory=list, description="List of discovered lore facts", alias="lore" ) character_updates: List[CharacterStateUpdate] = Field( - default_factory=list, description="List of character state changes" + default_factory=list, + description="List of character state changes", + alias="character_state", ) significant_events: List[str] = Field( - default_factory=list, description="List of significant plot points or events" + default_factory=list, + description="List of significant plot points or events", + alias="events", ) + + class Config: + populate_by_name = True diff --git a/src/llm/processor.py b/src/llm/processor.py index 2036508..df69d56 100644 --- a/src/llm/processor.py +++ b/src/llm/processor.py @@ -13,20 +13,20 @@ class LLMProcessor: self, api_key: Optional[str] = None, base_url: Optional[str] = None, - model: str = "gpt-4o", + model: Optional[str] = None, ): """ Initializes the LLMProcessor. :param api_key: OpenAI API key. If None, it looks for OPENAI_API_KEY in environment variables. :param base_url: OpenAI-compatible base URL (e.g., for vLLM). - :param model: The model to use for processing. + :param model: The model to use for processing. If None, it looks for LLM_MODEL in environment variables. """ self.client = OpenAI( api_key=api_key or os.environ.get("OPENAI_API_KEY"), base_url=base_url or os.environ.get("OPENAI_BASE_URL"), ) - self.model = model + self.model = model or os.environ.get("LLM_MODEL", "gpt-4o") def _call_llm( self, @@ -45,6 +45,7 @@ class LLMProcessor: {"role": "user", "content": user_prompt}, ], response_format=response_format, + extra_body={"include_reasoning": False}, ) return response.choices[0].message.content except Exception as e: @@ -55,27 +56,36 @@ class LLMProcessor: """ Stage 1: Raw Transcript -> Filtered Text. """ - return self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text) + result = self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text) + print(f"LLM Processor (Filter): {text} -> {result}") + return result def extract_structured_data(self, filtered_text: str) -> ExtractionResult: """ Stage 2: Filtered Text -> Structured Data. """ - # We use OpenAI's structured output (JSON mode/tool calling) via Pydantic's response_format. - # For models that support it, we can pass the Pydantic model directly. - # If we are using an older model or vLLM, we might need to manually parse the JSON. - - # Using the newer 'beta.chat.completions.parse' for Pydantic support + print(f"LLM Processor (Extract): Calling extraction for: {filtered_text}") try: - completion = self.client.beta.chat.completions.parse( + # Using standard chat.completions.create with JSON mode for better compatibility with vLLM + response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": EXTRACTION_SYSTEM_PROMPT}, {"role": "user", "content": filtered_text}, ], - response_format=ExtractionResult, + response_format={"type": "json_object"}, + extra_body={"include_reasoning": False}, ) - return completion.choices[0].message.parsed + + import json + + content = response.choices[0].message.content + print(f"LLM Processor (Extract): Raw JSON response: {content}") + data = json.loads(content) + + # Map the JSON data to the Pydantic model + return ExtractionResult(**data) + except Exception as e: print(f"Extraction Error: {e}") # Return an empty ExtractionResult if parsing fails diff --git a/src/pipeline/__pycache__/orchestrator.cpython-314.pyc b/src/pipeline/__pycache__/orchestrator.cpython-314.pyc index deb8141..94c2e6c 100644 Binary files a/src/pipeline/__pycache__/orchestrator.cpython-314.pyc and b/src/pipeline/__pycache__/orchestrator.cpython-314.pyc differ diff --git a/src/pipeline/orchestrator.py b/src/pipeline/orchestrator.py index 17919ec..b5f54a2 100644 --- a/src/pipeline/orchestrator.py +++ b/src/pipeline/orchestrator.py @@ -59,9 +59,10 @@ class PipelineOrchestrator: # Get raw text from transcript queue raw_text = await self.transcript_queue.get() - logger.info(f"Processing text: {raw_text}") + logger.info(f"LLM Worker: Processing text: {raw_text}") # Process via LLM (Filter -> Extract) + # Note: this is currently a synchronous call, which blocks the loop. result = self.processor.process_pipeline(raw_text) if ( @@ -69,10 +70,12 @@ class PipelineOrchestrator: or result.character_updates or result.significant_events ): - logger.info("Proposal generated. Putting into proposal queue.") + logger.info( + f"LLM Worker: Proposal generated. Putting into proposal queue. (Lore: {len(result.lore_updates)}, Char: {len(result.character_updates)})" + ) await self.proposal_queue.put(result) else: - logger.info("No relevant game data extracted.") + logger.info("LLM Worker: No relevant game data extracted.") except Exception as e: logger.error(f"LLM Worker error: {e}") diff --git a/src/stt/__pycache__/listener.cpython-314.pyc b/src/stt/__pycache__/listener.cpython-314.pyc index 30fb0aa..006facc 100644 Binary files a/src/stt/__pycache__/listener.cpython-314.pyc and b/src/stt/__pycache__/listener.cpython-314.pyc differ diff --git a/src/stt/__pycache__/transcriber.cpython-314.pyc b/src/stt/__pycache__/transcriber.cpython-314.pyc index 16fe8f7..8936e12 100644 Binary files a/src/stt/__pycache__/transcriber.cpython-314.pyc and b/src/stt/__pycache__/transcriber.cpython-314.pyc differ diff --git a/src/stt/listener.py b/src/stt/listener.py index 07a0f3c..16665ed 100644 --- a/src/stt/listener.py +++ b/src/stt/listener.py @@ -41,6 +41,9 @@ class AudioListener: target_samples = int(self.sample_rate * self.chunk_duration) chunk = chunk[:target_samples] + # Flatten to 1D array (samples,) as expected by faster-whisper + chunk = chunk.flatten() + # Use call_soon_threadsafe to put the chunk into the asyncio queue from the callback thread self.loop.call_soon_threadsafe(self.audio_queue.put_nowait, chunk) self._buffer = [] diff --git a/src/stt/transcriber.py b/src/stt/transcriber.py index ce96c33..188bbe8 100644 --- a/src/stt/transcriber.py +++ b/src/stt/transcriber.py @@ -46,8 +46,8 @@ class Transcriber: return "" try: - # faster-whisper expects audio in float32 - audio_data = audio_chunk.astype("float32") + # faster-whisper expects audio in float32 and 1D array + audio_data = audio_chunk.astype("float32").flatten() # Transcribe the audio segments, info = self.model.transcribe(audio_data, beam_size=5) diff --git a/src/ui/__pycache__/cli.cpython-314.pyc b/src/ui/__pycache__/cli.cpython-314.pyc index cd84994..164d568 100644 Binary files a/src/ui/__pycache__/cli.cpython-314.pyc and b/src/ui/__pycache__/cli.cpython-314.pyc differ diff --git a/src/ui/cli.py b/src/ui/cli.py index 7fcb6c1..d1075a9 100644 --- a/src/ui/cli.py +++ b/src/ui/cli.py @@ -2,11 +2,14 @@ import asyncio from typing import List import typer +from dotenv import load_dotenv from src.llm.models import CharacterStateUpdate, ExtractionResult, LoreUpdate from src.pipeline.orchestrator import PipelineOrchestrator from src.ui.tui import ConfirmationApp +load_dotenv() + app = typer.Typer(help="D&D Helpers CLI") @@ -17,14 +20,15 @@ def run(): """ typer.echo("Starting D&D Helpers pipeline...") - loop = asyncio.get_event_loop() - orchestrator = PipelineOrchestrator(loop=loop) + async def main(): + loop = asyncio.get_running_loop() + orchestrator = PipelineOrchestrator(loop=loop) + await orchestrator.run() try: - loop.run_until_complete(orchestrator.run()) + asyncio.run(main()) except KeyboardInterrupt: - orchestrator.stop() - loop.run_until_complete(asyncio.sleep(0)) # Give it a moment to cleanup + pass typer.echo("Pipeline stopped.")