Update UI and prompts
This commit is contained in:
+30
-25
@@ -1,11 +1,13 @@
|
||||
import logging
|
||||
import os
|
||||
from posix import system
|
||||
from this import s
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
from pydantic import ValidationError
|
||||
|
||||
from .models import ExtractionResult
|
||||
from .models import ExtractionResult, FilterResult
|
||||
from .prompts import EXTRACTION_SYSTEM_PROMPT, NOISE_FILTER_SYSTEM_PROMPT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -90,13 +92,28 @@ class LLMProcessor:
|
||||
logger.error(f"LLM Error: {e}")
|
||||
return ""
|
||||
|
||||
def filter_transcript(self, text: str, context: Optional[str] = None) -> str:
|
||||
def filter_transcript(
|
||||
self, text: str, context: Optional[str] = None
|
||||
) -> FilterResult:
|
||||
"""
|
||||
Stage 1: Raw Transcript -> Filtered Text.
|
||||
"""
|
||||
result = self._call_llm(NOISE_FILTER_SYSTEM_PROMPT, text, context=context)
|
||||
result = self._call_llm(
|
||||
NOISE_FILTER_SYSTEM_PROMPT,
|
||||
text,
|
||||
context=context,
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
logger.info(f"LLM Processor (Filter): {text} -> {result}")
|
||||
return result
|
||||
|
||||
import json
|
||||
|
||||
try:
|
||||
data = json.loads(result)
|
||||
return FilterResult(**data)
|
||||
except (json.JSONDecodeError, ValidationError) as e:
|
||||
logger.error(f"Filter Parsing Error: {e}")
|
||||
return FilterResult(contextual_info="", filtered_text=result)
|
||||
|
||||
def extract_structured_data(
|
||||
self, filtered_text: str, context: Optional[str] = None
|
||||
@@ -109,18 +126,18 @@ class LLMProcessor:
|
||||
# Using standard chat.completions.create with JSON mode for better compatibility with vLLM
|
||||
logger.info("LLM Processor (Extract): Sending request to backend...")
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
|
||||
]
|
||||
system_prompt = EXTRACTION_SYSTEM_PROMPT
|
||||
if context:
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Context from previous conversation:\n{context}",
|
||||
}
|
||||
)
|
||||
system_prompt += f"\n{context}"
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
]
|
||||
messages.append({"role": "user", "content": filtered_text})
|
||||
|
||||
for message in messages:
|
||||
logger.info(f"LLM Processor (Extract): Message: {message}")
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
@@ -142,15 +159,3 @@ class LLMProcessor:
|
||||
logger.error(f"Extraction Error: {e}")
|
||||
# Return an empty ExtractionResult if parsing fails
|
||||
return ExtractionResult()
|
||||
|
||||
def process_pipeline(
|
||||
self, raw_text: str, context: Optional[str] = None
|
||||
) -> ExtractionResult:
|
||||
"""
|
||||
Executes the two-stage pipeline: Raw Transcript -> Filtered Text -> Structured Data.
|
||||
"""
|
||||
filtered_text = self.filter_transcript(raw_text, context=context)
|
||||
if not filtered_text:
|
||||
return ExtractionResult()
|
||||
|
||||
return self.extract_structured_data(filtered_text, context=context)
|
||||
|
||||
Reference in New Issue
Block a user