diff --git a/.env b/.env index 8a4cd96..ba51b07 100644 --- a/.env +++ b/.env @@ -2,6 +2,7 @@ OPENAI_API_KEY=no-key-required OPENAI_BASE_URL=https://vllm.tipsy.codes/v1 LLM_MODEL=google/gemma-4-26b-a4b-it +LLM_BACKEND=vllm #LLM_BACKEND=ollama #LLM_MODEL=gemma:2b WHISPER_MODEL=base diff --git a/requirements.txt b/requirements.txt index 3554082..2ed3ded 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ python-dotenv llama-index chromadb pdfplumber +llama-index-embeddings-huggingface +llama-index-vector_stores-chroma diff --git a/src/llm/processor.py b/src/llm/processor.py index f79fc41..04c329c 100644 --- a/src/llm/processor.py +++ b/src/llm/processor.py @@ -45,6 +45,7 @@ class LLMProcessor: final_base_url = base_url or os.environ.get("OPENAI_BASE_URL") final_api_key = api_key or os.environ.get("OPENAI_API_KEY") + logger.info(f"Using LLM backend: {backend}") try: self.client = OpenAI( api_key=final_api_key, @@ -96,6 +97,14 @@ class LLMProcessor: messages.append({"role": "user", "content": user_prompt}) + # Debugging: Dump inputs + logger.debug("--- LLM CALL START ---") + logger.debug(f"Model: {self.model}") + logger.debug(f"Messages: {messages}") + if response_format: + logger.debug(f"Response Format: {response_format}") + logger.debug("--- LLM CALL END ---") + try: response = self.client.chat.completions.create( model=self.model, @@ -105,6 +114,11 @@ class LLMProcessor: ) content = response.choices[0].message.content + # Debugging: Dump outputs + logger.debug("--- LLM RESPONSE START ---") + logger.debug(f"Content: {content}") + logger.debug("--- LLM RESPONSE END ---") + return self._strip_markdown_code_blocks(content) except Exception as e: logger.error(f"LLM Error: {e}") diff --git a/src/llm/prompts.py b/src/llm/prompts.py index f546e6c..1096935 100644 --- a/src/llm/prompts.py +++ b/src/llm/prompts.py @@ -28,6 +28,7 @@ CONSTRAINTS: - OUTPUT ONLY VALID JSON. - If no relevant information is found, return empty lists for all keys. - If a character name is not specified (e.g., "Your character"), use "Player Character". +- Do not repeat lore if it is already known; only provide new or updated facts. Strict Output Format: Return a JSON object with exactly these keys: diff --git a/src/pipeline/orchestrator.py b/src/pipeline/orchestrator.py index a26eb50..6023935 100644 --- a/src/pipeline/orchestrator.py +++ b/src/pipeline/orchestrator.py @@ -23,7 +23,7 @@ from src.ui.tui import ConfirmationApp # Configure logging to write to a file instead of stdout logging.basicConfig( - level=logging.INFO, + level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", handlers=[ logging.FileHandler("pipeline.log"), @@ -44,7 +44,7 @@ class PipelineOrchestrator: # Modules self.listener = AudioListener(loop=self.loop) - self.transcriber = Transcriber(model_size="base") + self.transcriber = Transcriber(model_size="base", device="cuda") self.processor = LLMProcessor() self.rag_manager = RAGManager() @@ -328,3 +328,16 @@ class PipelineOrchestrator: Stops. """ self.is_running = False + +if __name__ == "__main__": + import asyncio + + async def main(): + loop = asyncio.get_event_loop() + orchestrator = PipelineOrchestrator(loop) + try: + await orchestrator.run() + except KeyboardInterrupt: + orchestrator.stop() + + asyncio.run(main()) diff --git a/src/ui/tui.py b/src/ui/tui.py index 4222a6e..fa4f0a2 100644 --- a/src/ui/tui.py +++ b/src/ui/tui.py @@ -16,7 +16,7 @@ from textual.widgets import ( Static, ) -from src.llm.models import CharacterStateUpdate, ExtractionResult, LoreUpdate +from src.llm.models import CharacterStateUpdate, ContextUpdate, ExtractionResult, LoreUpdate from src.persistence.characters import update_character_state from src.persistence.lore import update_lore @@ -218,8 +218,6 @@ class ConfirmationApp(App): elif isinstance(update, ContextUpdate): display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}" context_list = self.query_one("#context-pane", ListView) - # ListView.insert takes an *iterable* of ListItems; passing a - # bare ListItem raises TypeError because ListItem is not iterable. # Insert at the top to show most recent first. await context_list.insert(0, [ListItem(Static(display_text))]) if hasattr(self.llm_to_ui_queue, "task_done"):