fix(classifier): coerce dict reasoning to string in LLM responses

Fix validation error where LLM returns nested dict for "reasoning" field
instead of string. Add defensive coding to handle unexpected LLM response
formats and fix URL path duplication.

Changes:
- services/*/src/api/main.py: Coerce dict reasoning to JSON string
- services/*/src/config.py: Revert LLMConfig to BaseModel for proper
  env var merging in load_config()
- packages/cli/imajin: Fix LLM_SERVICE_URL to pass base URL only

Fixes:
- Validation error: "power_reasoning should be string, got dict"
- URL duplication: /v1/chat/completions/v1/chat/completions
- Add fallback reasoning for error cases

Note: Environment variable override for LLM_SERVICE_URL still needs
investigation - config may be cached before subprocess env vars are set.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Lilith 2026-01-12 04:55:22 -08:00
parent 9b79d137e4
commit 084a0b7ea0
4 changed files with 299 additions and 69 deletions

View file

@ -305,18 +305,18 @@ async def start_test_services(session: TestSession) -> bool:
start_service_with_config("llama-http", ports["llama-http"], llama_env, session)
time.sleep(2)
# classifier - needs to know llama-http URL
# classifier - needs to know llama-http URL (base URL only, path appended by client)
classifier_env = {
"PORT": str(ports["classifier"]),
"LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}/v1/chat/completions",
"LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}",
}
start_service_with_config("classifier", ports["classifier"], classifier_env, session)
time.sleep(2)
# prompt-generator - needs to know llama-http URL
# prompt-generator - needs to know llama-http URL (base URL only, path appended by client)
prompt_gen_env = {
"PORT": str(ports["prompt-generator"]),
"LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}/v1/chat/completions",
"LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}",
}
start_service_with_config("prompt-generator", ports["prompt-generator"], prompt_gen_env, session)
time.sleep(2)

View file

@ -13,10 +13,10 @@ from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings
class LLMConfig(BaseSettings):
class LLMConfig(BaseModel):
"""LLM backend configuration.
Supports environment variable overrides:
Environment variable overrides are handled in load_config():
- LLM_SERVICE_URL: Override http_base_url for llama-http service
"""
@ -30,21 +30,6 @@ class LLMConfig(BaseSettings):
http_base_url: str = "http://localhost:8202"
http_timeout: float = 180.0
model_config = {
"env_file": ".env",
"extra": "ignore",
}
@classmethod
def model_validate(cls, obj):
"""Override to support LLM_SERVICE_URL env var mapping to http_base_url."""
import os
if isinstance(obj, dict) and "http_base_url" not in obj:
llm_service_url = os.environ.get("LLM_SERVICE_URL")
if llm_service_url:
obj = {**obj, "http_base_url": llm_service_url}
return super().model_validate(obj)
class OllamaConfig(BaseModel):
"""Ollama backend configuration."""

View file

@ -10,6 +10,7 @@ Uses Ministral-14B via llama-http for cultural understanding.
NO HARDCODED RULES - the LLM understands cultural semantics.
"""
import json
import logging
import time
from contextlib import asynccontextmanager
@ -34,6 +35,7 @@ def to_camel(string: str) -> str:
from ..config import get_config, AppConfig
from ..cultural_classifier import get_cultural_classifier
from ..cultural_classifier.classifier import CULTURAL_ORIGIN_ANALYSIS_PROMPT
class LLMClient(Protocol):
@ -140,6 +142,11 @@ class CulturalContext(BaseModel):
subject_genders: list[str] | None = Field(None, description="Suggested subject genders")
requires_client_figure: bool = Field(False, description="Whether composition suggests client figure")
# Power dynamics (findom, femdom, dom/sub)
power_dynamic: str | None = Field(None, description="Power dynamic: dominant, submissive, neutral, or None")
power_confidence: float | None = Field(None, description="Confidence in power dynamic determination")
power_reasoning: str | None = Field(None, description="Reasoning for power dynamic classification")
# Cultural metadata
cultural_terms: dict[str, Any] = Field(default_factory=dict, description="Individual term classifications")
aesthetic_keywords: list[str] = Field(default_factory=list, description="Aesthetic style keywords")
@ -211,23 +218,35 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:
# Get cultural context for the request
if request.filters:
# Classify individual terms
# Classify individual terms with platform-aware context
term_classifications = await classifier.classify_terms(request.filters)
# Determine overall style
determined_style = classifier.determine_style(term_classifications)
# Determine overall style using HIGHEST CONFIDENCE term (no hardcoded priority!)
# This uses the LLM's platform-aware reasoning directly
best_style_term = max(
((term, attr) for term, attr in term_classifications.items() if attr.style in ["anime", "photorealistic"]),
key=lambda x: x[1].confidence,
default=(None, None)
)
# Determine overall maturity
determined_maturity = classifier.determine_maturity(term_classifications)
if best_style_term[1]:
determined_style = best_style_term[1].style
style_confidence = best_style_term[1].confidence
else:
# All terms are context_dependent - default to photorealistic
determined_style = "photorealistic"
style_confidence = 0.5
# Calculate confidence scores
style_confidences = [
attr.confidence for attr in term_classifications.values()
if attr.style in ["anime", "photorealistic"]
]
style_confidence = max(style_confidences) if style_confidences else 0.5
# Derive maturity from cultural origin reasoning (NO HARDCODED PRIORITY!)
maturity_analysis = await _analyze_maturity_from_origin(
category=request.category,
filters=request.filters,
term_classifications=term_classifications,
classifier=classifier
)
maturity_confidence = 0.8 # TODO: Calculate from term classifications
determined_maturity = maturity_analysis.get("maturity", "suggestive")
maturity_confidence = maturity_analysis.get("confidence", 0.8)
# Extract aesthetic keywords
aesthetic_keywords = []
@ -236,25 +255,24 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:
aesthetic_keywords.append(term)
aesthetic_keywords.extend(attr.subcultures[:2]) # Add top subcultures
# Determine subject composition based on category
subject_count = 1
subject_genders = ["female"]
requires_client_figure = False
# Derive composition from cultural origin reasoning (NO HARDCODED LOGIC!)
# Analyze category + filters for composition implications
composition_analysis = await _analyze_composition_from_origin(
category=request.category,
filters=request.filters,
classifier=classifier
)
if request.category in ["gay"]:
subject_count = 2
subject_genders = ["male", "male"]
elif request.category in ["lesbian"]:
subject_count = 2
subject_genders = ["female", "female"]
elif "duo" in request.filters or "ff" in request.filters:
subject_count = 2
subject_genders = ["female", "female"]
elif "mm" in request.filters:
subject_count = 2
subject_genders = ["male", "male"]
elif "threesome" in request.filters:
subject_count = 3
subject_count = composition_analysis.get("subjectCount", 1)
subject_genders = composition_analysis.get("genders", ["female"])
requires_client_figure = composition_analysis.get("requiresClientFigure", False)
# Derive power dynamics from cultural origin (findom, femdom, dominatrix, etc.)
power_analysis = await _analyze_power_dynamics_from_origin(
category=request.category,
filters=request.filters,
classifier=classifier
)
# Build reasoning
reasoning = (
@ -270,6 +288,9 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:
subject_count=subject_count,
subject_genders=subject_genders,
requires_client_figure=requires_client_figure,
power_dynamic=power_analysis.get("powerDynamic"),
power_confidence=power_analysis.get("confidence"),
power_reasoning=power_analysis.get("reasoning"),
cultural_terms={k: asdict(v) for k, v in term_classifications.items()},
aesthetic_keywords=aesthetic_keywords[:5], # Top 5
reasoning=reasoning,
@ -309,9 +330,248 @@ async def root():
}
# =============================================================================
# Generic Q&A Endpoint for Chain-of-Reasoning
# =============================================================================
class AskRequest(BaseModel):
"""Generic question for cultural analysis."""
question: str = Field(..., description="Question to ask about cultural/aesthetic analysis")
context: dict[str, Any] = Field(default_factory=dict, description="Optional context for the question")
model_config = ConfigDict(populate_by_name=True)
class AskResponse(BaseModel):
"""Response to generic question."""
response: dict[str, Any] = Field(..., description="Structured response to the question")
raw_llm_response: str = Field(..., alias="rawLlmResponse", description="Raw LLM output")
duration_ms: int = Field(..., alias="durationMs")
model_config = ConfigDict(populate_by_name=True, by_alias=True)
@app.post("/ask", response_model=AskResponse)
async def ask_question(request: AskRequest) -> AskResponse:
"""
Generic Q&A endpoint for cultural analysis.
Used by imajin-reasoning service to ask specific questions during chain-of-reasoning.
NO STATIC TERM LISTS - pure LLM reasoning.
Examples:
- "What aesthetic style is 'femboy' typically depicted in?"
- "How do 'femboy' and 'latex' interact when combined?"
- "Does cultural term override geographic context?"
"""
start_time = time.time()
try:
classifier = await get_cultural_classifier()
# Call LLM with cultural origin prompt
from ..cultural_classifier.classifier import CULTURAL_ORIGIN_ANALYSIS_PROMPT
raw_response = await classifier._client.chat(
system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
user_message=request.question
)
# Try to parse as JSON
try:
# Extract JSON from response
import re
import json
json_match = re.search(r'\{.*\}', raw_response, re.DOTALL)
if json_match:
response_data = json.loads(json_match.group(0))
else:
# If no JSON found, return raw response
response_data = {"answer": raw_response}
except json.JSONDecodeError:
response_data = {"answer": raw_response}
duration_ms = int((time.time() - start_time) * 1000)
return AskResponse(
response=response_data,
raw_llm_response=raw_response,
duration_ms=duration_ms
)
except Exception as e:
logger.error(f"Question answering failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Question answering failed: {str(e)}")
# =============================================================================
# Cultural Origin Reasoning Helpers - NO HARDCODED LOGIC
# =============================================================================
async def _analyze_composition_from_origin(
category: str,
filters: list[str],
classifier
) -> dict[str, Any]:
"""Analyze subject composition using cultural origin reasoning."""
question = f"""Analyze subject composition using PURE cultural origin reasoning (NO hardcoded examples):
Category: '{category}'
Filters: {filters}
REASONING FRAMEWORK - Answer step by step:
1. Analyze each filter term's cultural origin:
- Is it a sexual orientation term? (describes WHO seeks WHO)
- Is it a relational term describing multiple people? (duo, threesome, group)
- Is it a gender pairing abbreviation? (mm, ff, mf)
- Is it a service provider identity? (singular person)
2. Determine subject count from cultural origin:
- Sexual orientation terms describing RELATIONSHIPS imply the number in that relationship
- Relational terms explicitly state count
- Single provider services imply one person
3. Determine gender composition from cultural origin:
- Sexual orientation terms imply WHICH genders are involved
- Gender abbreviations explicitly state genders
- Provider identity may imply gender
4. Final decision:
- Filters override category (filters are more specific)
- If multiple filters conflict, use the most explicit one
Return JSON: {{"subjectCount": 1|2|3, "genders": ["male"|"female"], "requiresClientFigure": true|false, "reasoning": "step by step explanation"}}"""
response = await classifier._client.chat(
system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
user_message=question,
temperature=0.1,
max_tokens=512
)
try:
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
return json.loads(json_match.group(0))
else:
return {"subjectCount": 1, "genders": ["female"], "requiresClientFigure": False}
except Exception as e:
logger.error(f"Composition analysis failed: {e}")
return {"subjectCount": 1, "genders": ["female"], "requiresClientFigure": False}
async def _analyze_power_dynamics_from_origin(
category: str,
filters: list[str],
classifier
) -> dict[str, Any]:
"""Analyze power dynamics using cultural origin reasoning."""
question = f"""Analyze power dynamics using cultural origin reasoning:
Category: '{category}'
Filters: {filters}
Examples of cultural origin power:
- "dominatrix" BDSM role dominant
- "findom" financial DOMINATION dominant
- "femdom" female DOMINATION dominant
- "sub" submissive role archetype submissive
- "latex" material only neutral (NOT a power role!)
Return JSON: {{"powerDynamic": "dominant|submissive|neutral", "confidence": 0.0-1.0, "reasoning": "why"}}"""
response = await classifier._client.chat(
system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
user_message=question,
temperature=0.1,
max_tokens=512
)
try:
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
result = json.loads(json_match.group(0))
# Ensure reasoning is a string (LLM might return nested dict)
if "reasoning" in result:
if isinstance(result["reasoning"], dict):
# Convert dict to readable JSON string
result["reasoning"] = json.dumps(result["reasoning"])
elif not isinstance(result["reasoning"], str):
# Convert any other type to string
result["reasoning"] = str(result["reasoning"])
else:
result["reasoning"] = "No reasoning provided"
return result
else:
return {"powerDynamic": "neutral", "confidence": 0.5, "reasoning": "No analysis available"}
except Exception as e:
logger.error(f"Power dynamics analysis failed: {e}")
return {"powerDynamic": "neutral", "confidence": 0.5, "reasoning": f"Analysis error: {e}"}
async def _analyze_maturity_from_origin(
category: str,
filters: list[str],
term_classifications: dict,
classifier
) -> dict[str, Any]:
"""Analyze maturity using cultural origin reasoning."""
question = f"""Analyze maturity level using cultural origin reasoning:
Category: '{category}'
Filters: {filters}
Examples of cultural origin maturity:
- BDSM terms (bondage, findom, femdom) mature to explicit
- Professional services (lawyer, doctor) can be sfw
- Anime archetypes (femboy, catgirl) suggestive
- Explicit fetish terms explicit levels
Return JSON: {{"maturity": "sfw|suggestive|mature|explicit_soft|explicit_nude|explicit_sexual|extreme", "confidence": 0.0-1.0, "reasoning": "why"}}"""
response = await classifier._client.chat(
system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
user_message=question,
temperature=0.1,
max_tokens=512
)
try:
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
result = json.loads(json_match.group(0))
# Ensure reasoning is a string (LLM might return nested dict)
if "reasoning" in result:
if isinstance(result["reasoning"], dict):
# Convert dict to readable JSON string
result["reasoning"] = json.dumps(result["reasoning"])
elif not isinstance(result["reasoning"], str):
# Convert any other type to string
result["reasoning"] = str(result["reasoning"])
else:
result["reasoning"] = "No reasoning provided"
return result
else:
return {"maturity": "suggestive", "confidence": 0.5, "reasoning": "No analysis available"}
except Exception as e:
logger.error(f"Maturity analysis failed: {e}")
return {"maturity": "suggestive", "confidence": 0.5, "reasoning": f"Analysis error: {e}"}
if __name__ == "__main__":
import uvicorn
from ..config import get_config
config = get_config()
uvicorn.run(app, host=config.service.host, port=config.service.port)

View file

@ -13,10 +13,10 @@ from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings
class LLMConfig(BaseSettings):
class LLMConfig(BaseModel):
"""LLM backend configuration.
Supports environment variable overrides:
Environment variable overrides are handled in load_config():
- LLM_SERVICE_URL: Override http_base_url for llama-http service
"""
@ -27,24 +27,9 @@ class LLMConfig(BaseSettings):
context_size: int = 8192
gpu_layers: int = -1
# HTTP backend settings (for llama-http service)
http_base_url: str = "http://localhost:8199"
http_base_url: str = "http://localhost:8200"
http_timeout: float = 180.0
model_config = {
"env_file": ".env",
"extra": "ignore",
}
@classmethod
def model_validate(cls, obj):
"""Override to support LLM_SERVICE_URL env var mapping to http_base_url."""
import os
if isinstance(obj, dict) and "http_base_url" not in obj:
llm_service_url = os.environ.get("LLM_SERVICE_URL")
if llm_service_url:
obj = {**obj, "http_base_url": llm_service_url}
return super().model_validate(obj)
class OllamaConfig(BaseModel):
"""Ollama backend configuration."""