Add LLM backend support and improve debugging observability
- Add LLM_BACKEND to environment configuration - Implement detailed debug logging for LLM request/response cycles - Add missing llama-index dependencies for embeddings and chroma - Update prompt constraints to prevent lore redundancy - Enable CUDA for transcription and set logging to DEBUG level - Add entry point for running the orchestrator directly - Cleanup unused comment in TUI context updates
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
OPENAI_API_KEY=no-key-required
|
OPENAI_API_KEY=no-key-required
|
||||||
OPENAI_BASE_URL=https://vllm.tipsy.codes/v1
|
OPENAI_BASE_URL=https://vllm.tipsy.codes/v1
|
||||||
LLM_MODEL=google/gemma-4-26b-a4b-it
|
LLM_MODEL=google/gemma-4-26b-a4b-it
|
||||||
|
LLM_BACKEND=vllm
|
||||||
#LLM_BACKEND=ollama
|
#LLM_BACKEND=ollama
|
||||||
#LLM_MODEL=gemma:2b
|
#LLM_MODEL=gemma:2b
|
||||||
WHISPER_MODEL=base
|
WHISPER_MODEL=base
|
||||||
|
|||||||
@@ -9,3 +9,5 @@ python-dotenv
|
|||||||
llama-index
|
llama-index
|
||||||
chromadb
|
chromadb
|
||||||
pdfplumber
|
pdfplumber
|
||||||
|
llama-index-embeddings-huggingface
|
||||||
|
llama-index-vector_stores-chroma
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ class LLMProcessor:
|
|||||||
final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
|
final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
|
||||||
final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
logger.info(f"Using LLM backend: {backend}")
|
||||||
try:
|
try:
|
||||||
self.client = OpenAI(
|
self.client = OpenAI(
|
||||||
api_key=final_api_key,
|
api_key=final_api_key,
|
||||||
@@ -96,6 +97,14 @@ class LLMProcessor:
|
|||||||
|
|
||||||
messages.append({"role": "user", "content": user_prompt})
|
messages.append({"role": "user", "content": user_prompt})
|
||||||
|
|
||||||
|
# Debugging: Dump inputs
|
||||||
|
logger.debug("--- LLM CALL START ---")
|
||||||
|
logger.debug(f"Model: {self.model}")
|
||||||
|
logger.debug(f"Messages: {messages}")
|
||||||
|
if response_format:
|
||||||
|
logger.debug(f"Response Format: {response_format}")
|
||||||
|
logger.debug("--- LLM CALL END ---")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.client.chat.completions.create(
|
response = self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
@@ -105,6 +114,11 @@ class LLMProcessor:
|
|||||||
)
|
)
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
|
|
||||||
|
# Debugging: Dump outputs
|
||||||
|
logger.debug("--- LLM RESPONSE START ---")
|
||||||
|
logger.debug(f"Content: {content}")
|
||||||
|
logger.debug("--- LLM RESPONSE END ---")
|
||||||
|
|
||||||
return self._strip_markdown_code_blocks(content)
|
return self._strip_markdown_code_blocks(content)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LLM Error: {e}")
|
logger.error(f"LLM Error: {e}")
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ CONSTRAINTS:
|
|||||||
- OUTPUT ONLY VALID JSON.
|
- OUTPUT ONLY VALID JSON.
|
||||||
- If no relevant information is found, return empty lists for all keys.
|
- If no relevant information is found, return empty lists for all keys.
|
||||||
- If a character name is not specified (e.g., "Your character"), use "Player Character".
|
- If a character name is not specified (e.g., "Your character"), use "Player Character".
|
||||||
|
- Do not repeat lore if it is already known; only provide new or updated facts.
|
||||||
|
|
||||||
Strict Output Format:
|
Strict Output Format:
|
||||||
Return a JSON object with exactly these keys:
|
Return a JSON object with exactly these keys:
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from src.ui.tui import ConfirmationApp
|
|||||||
|
|
||||||
# Configure logging to write to a file instead of stdout
|
# Configure logging to write to a file instead of stdout
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.DEBUG,
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
handlers=[
|
handlers=[
|
||||||
logging.FileHandler("pipeline.log"),
|
logging.FileHandler("pipeline.log"),
|
||||||
@@ -44,7 +44,7 @@ class PipelineOrchestrator:
|
|||||||
|
|
||||||
# Modules
|
# Modules
|
||||||
self.listener = AudioListener(loop=self.loop)
|
self.listener = AudioListener(loop=self.loop)
|
||||||
self.transcriber = Transcriber(model_size="base")
|
self.transcriber = Transcriber(model_size="base", device="cuda")
|
||||||
self.processor = LLMProcessor()
|
self.processor = LLMProcessor()
|
||||||
self.rag_manager = RAGManager()
|
self.rag_manager = RAGManager()
|
||||||
|
|
||||||
@@ -328,3 +328,16 @@ class PipelineOrchestrator:
|
|||||||
Stops.
|
Stops.
|
||||||
"""
|
"""
|
||||||
self.is_running = False
|
self.is_running = False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
orchestrator = PipelineOrchestrator(loop)
|
||||||
|
try:
|
||||||
|
await orchestrator.run()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
orchestrator.stop()
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
|||||||
+1
-3
@@ -16,7 +16,7 @@ from textual.widgets import (
|
|||||||
Static,
|
Static,
|
||||||
)
|
)
|
||||||
|
|
||||||
from src.llm.models import CharacterStateUpdate, ExtractionResult, LoreUpdate
|
from src.llm.models import CharacterStateUpdate, ContextUpdate, ExtractionResult, LoreUpdate
|
||||||
from src.persistence.characters import update_character_state
|
from src.persistence.characters import update_character_state
|
||||||
from src.persistence.lore import update_lore
|
from src.persistence.lore import update_lore
|
||||||
|
|
||||||
@@ -218,8 +218,6 @@ class ConfirmationApp(App):
|
|||||||
elif isinstance(update, ContextUpdate):
|
elif isinstance(update, ContextUpdate):
|
||||||
display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}"
|
display_text = f"Query: {update.query}\nSource: {update.source}\n\n{update.snippet}"
|
||||||
context_list = self.query_one("#context-pane", ListView)
|
context_list = self.query_one("#context-pane", ListView)
|
||||||
# ListView.insert takes an *iterable* of ListItems; passing a
|
|
||||||
# bare ListItem raises TypeError because ListItem is not iterable.
|
|
||||||
# Insert at the top to show most recent first.
|
# Insert at the top to show most recent first.
|
||||||
await context_list.insert(0, [ListItem(Static(display_text))])
|
await context_list.insert(0, [ListItem(Static(display_text))])
|
||||||
if hasattr(self.llm_to_ui_queue, "task_done"):
|
if hasattr(self.llm_to_ui_queue, "task_done"):
|
||||||
|
|||||||
Reference in New Issue
Block a user