Improve audio capture and LLM integration

- Implement Silero VAD for dynamic audio chunking
- Add support for Ollama and vLLM backends
- Harden extraction prompts for strict JSON output
- Refactor TUI worker to handle proposals asynchronously
This commit is contained in:
2026-05-26 19:51:48 -07:00
parent 60e170e777
commit 58bab75bb5
11 changed files with 290 additions and 78 deletions
+30 -4
View File
@@ -22,10 +22,34 @@ class LLMProcessor:
:param base_url: OpenAI-compatible base URL (e.g., for vLLM).
:param model: The model to use for processing. If None, it looks for LLM_MODEL in environment variables.
"""
self.client = OpenAI(
api_key=api_key or os.environ.get("OPENAI_API_KEY"),
base_url=base_url or os.environ.get("OPENAI_BASE_URL"),
)
backend = os.environ.get("LLM_BACKEND", "openai").lower()
if backend == "ollama":
# Ollama's OpenAI-compatible API
final_base_url = base_url or "http://localhost:11434/v1"
final_api_key = api_key or "ollama"
elif backend == "vllm":
# Remote vLLM server
final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
else: # default to openai
final_base_url = base_url or os.environ.get("OPENAI_BASE_URL")
final_api_key = api_key or os.environ.get("OPENAI_API_KEY")
try:
self.client = OpenAI(
api_key=final_api_key,
base_url=final_base_url,
)
# Simple connectivity check for local backends
if backend == "ollama":
# We can't easily check connectivity without making a call,
# but we can ensure the client is initialized.
pass
except Exception as e:
print(f"Error initializing LLM client for backend {backend}: {e}")
raise
self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
def _call_llm(
@@ -67,6 +91,7 @@ class LLMProcessor:
print(f"LLM Processor (Extract): Calling extraction for: {filtered_text}")
try:
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
print("LLM Processor (Extract): Sending request to backend...")
response = self.client.chat.completions.create(
model=self.model,
messages=[
@@ -76,6 +101,7 @@ class LLMProcessor:
response_format={"type": "json_object"},
extra_body={"include_reasoning": False},
)
print("LLM Processor (Extract): Response received from backend.")
import json