fix(classifier): coerce dict reasoning to string in LLM responses

Fix validation error where LLM returns nested dict for "reasoning" field instead of string. Add defensive coding to handle unexpected LLM response formats and fix URL path duplication. Changes: - services/*/src/api/main.py: Coerce dict reasoning to JSON string - services/*/src/config.py: Revert LLMConfig to BaseModel for proper env var merging in load_config() - packages/cli/imajin: Fix LLM_SERVICE_URL to pass base URL only Fixes: - Validation error: "power_reasoning should be string, got dict" - URL duplication: /v1/chat/completions/v1/chat/completions - Add fallback reasoning for error cases Note: Environment variable override for LLM_SERVICE_URL still needs investigation - config may be cached before subprocess env vars are set. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-12 04:55:22 -08:00 · 2026-01-12 04:55:22 -08:00 · 084a0b7ea0
commit 084a0b7ea0
parent 9b79d137e4
4 changed files with 299 additions and 69 deletions
--- a/packages/cli/imajin
+++ b/packages/cli/imajin
@ -305,18 +305,18 @@ async def start_test_services(session: TestSession) -> bool:
    start_service_with_config("llama-http", ports["llama-http"], llama_env, session)
    time.sleep(2)

-    # classifier - needs to know llama-http URL
+    # classifier - needs to know llama-http URL (base URL only, path appended by client)
    classifier_env = {
        "PORT": str(ports["classifier"]),
-        "LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}/v1/chat/completions",
+        "LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}",
    }
    start_service_with_config("classifier", ports["classifier"], classifier_env, session)
    time.sleep(2)

-    # prompt-generator - needs to know llama-http URL
+    # prompt-generator - needs to know llama-http URL (base URL only, path appended by client)
    prompt_gen_env = {
        "PORT": str(ports["prompt-generator"]),
-        "LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}/v1/chat/completions",
+        "LLM_SERVICE_URL": f"http://localhost:{ports['llama-http']}",
    }
    start_service_with_config("prompt-generator", ports["prompt-generator"], prompt_gen_env, session)
    time.sleep(2)
--- a/services/imajin-prompt-generator/service/src/config.py
+++ b/services/imajin-prompt-generator/service/src/config.py
@ -13,10 +13,10 @@ from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings


-class LLMConfig(BaseSettings):
+class LLMConfig(BaseModel):
    """LLM backend configuration.

-    Supports environment variable overrides:
+    Environment variable overrides are handled in load_config():
    - LLM_SERVICE_URL: Override http_base_url for llama-http service
    """

@ -30,21 +30,6 @@ class LLMConfig(BaseSettings):
    http_base_url: str = "http://localhost:8202"
    http_timeout: float = 180.0

-    model_config = {
-        "env_file": ".env",
-        "extra": "ignore",
-    }
-
-    @classmethod
-    def model_validate(cls, obj):
-        """Override to support LLM_SERVICE_URL env var mapping to http_base_url."""
-        import os
-        if isinstance(obj, dict) and "http_base_url" not in obj:
-            llm_service_url = os.environ.get("LLM_SERVICE_URL")
-            if llm_service_url:
-                obj = {**obj, "http_base_url": llm_service_url}
-        return super().model_validate(obj)
-

 class OllamaConfig(BaseModel):
    """Ollama backend configuration."""
--- a/services/imajin-request-classifier/service/src/api/main.py
+++ b/services/imajin-request-classifier/service/src/api/main.py
@ -10,6 +10,7 @@ Uses Ministral-14B via llama-http for cultural understanding.
 NO HARDCODED RULES - the LLM understands cultural semantics.
 """

+import json
 import logging
 import time
 from contextlib import asynccontextmanager
@ -34,6 +35,7 @@ def to_camel(string: str) -> str:

 from ..config import get_config, AppConfig
 from ..cultural_classifier import get_cultural_classifier
+from ..cultural_classifier.classifier import CULTURAL_ORIGIN_ANALYSIS_PROMPT


 class LLMClient(Protocol):
@ -140,6 +142,11 @@ class CulturalContext(BaseModel):
    subject_genders: list[str] | None = Field(None, description="Suggested subject genders")
    requires_client_figure: bool = Field(False, description="Whether composition suggests client figure")

+    # Power dynamics (findom, femdom, dom/sub)
+    power_dynamic: str | None = Field(None, description="Power dynamic: dominant, submissive, neutral, or None")
+    power_confidence: float | None = Field(None, description="Confidence in power dynamic determination")
+    power_reasoning: str | None = Field(None, description="Reasoning for power dynamic classification")
+
    # Cultural metadata
    cultural_terms: dict[str, Any] = Field(default_factory=dict, description="Individual term classifications")
    aesthetic_keywords: list[str] = Field(default_factory=list, description="Aesthetic style keywords")
@ -211,23 +218,35 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:

        # Get cultural context for the request
        if request.filters:
-            # Classify individual terms
+            # Classify individual terms with platform-aware context
            term_classifications = await classifier.classify_terms(request.filters)

-            # Determine overall style
-            determined_style = classifier.determine_style(term_classifications)
+            # Determine overall style using HIGHEST CONFIDENCE term (no hardcoded priority!)
+            # This uses the LLM's platform-aware reasoning directly
+            best_style_term = max(
+                ((term, attr) for term, attr in term_classifications.items() if attr.style in ["anime", "photorealistic"]),
+                key=lambda x: x[1].confidence,
+                default=(None, None)
+            )

-            # Determine overall maturity
-            determined_maturity = classifier.determine_maturity(term_classifications)
+            if best_style_term[1]:
+                determined_style = best_style_term[1].style
+                style_confidence = best_style_term[1].confidence
+            else:
+                # All terms are context_dependent - default to photorealistic
+                determined_style = "photorealistic"
+                style_confidence = 0.5

-            # Calculate confidence scores
-            style_confidences = [
-                attr.confidence for attr in term_classifications.values()
-                if attr.style in ["anime", "photorealistic"]
-            ]
-            style_confidence = max(style_confidences) if style_confidences else 0.5
+            # Derive maturity from cultural origin reasoning (NO HARDCODED PRIORITY!)
+            maturity_analysis = await _analyze_maturity_from_origin(
+                category=request.category,
+                filters=request.filters,
+                term_classifications=term_classifications,
+                classifier=classifier
+            )

-            maturity_confidence = 0.8  # TODO: Calculate from term classifications
+            determined_maturity = maturity_analysis.get("maturity", "suggestive")
+            maturity_confidence = maturity_analysis.get("confidence", 0.8)

            # Extract aesthetic keywords
            aesthetic_keywords = []
@ -236,25 +255,24 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:
                    aesthetic_keywords.append(term)
                    aesthetic_keywords.extend(attr.subcultures[:2])  # Add top subcultures

-            # Determine subject composition based on category
-            subject_count = 1
-            subject_genders = ["female"]
-            requires_client_figure = False
+            # Derive composition from cultural origin reasoning (NO HARDCODED LOGIC!)
+            # Analyze category + filters for composition implications
+            composition_analysis = await _analyze_composition_from_origin(
+                category=request.category,
+                filters=request.filters,
+                classifier=classifier
+            )

-            if request.category in ["gay"]:
-                subject_count = 2
-                subject_genders = ["male", "male"]
-            elif request.category in ["lesbian"]:
-                subject_count = 2
-                subject_genders = ["female", "female"]
-            elif "duo" in request.filters or "ff" in request.filters:
-                subject_count = 2
-                subject_genders = ["female", "female"]
-            elif "mm" in request.filters:
-                subject_count = 2
-                subject_genders = ["male", "male"]
-            elif "threesome" in request.filters:
-                subject_count = 3
+            subject_count = composition_analysis.get("subjectCount", 1)
+            subject_genders = composition_analysis.get("genders", ["female"])
+            requires_client_figure = composition_analysis.get("requiresClientFigure", False)
+
+            # Derive power dynamics from cultural origin (findom, femdom, dominatrix, etc.)
+            power_analysis = await _analyze_power_dynamics_from_origin(
+                category=request.category,
+                filters=request.filters,
+                classifier=classifier
+            )

            # Build reasoning
            reasoning = (
@ -270,6 +288,9 @@ async def classify_request(request: ClassifyRequest) -> ClassifyResponse:
                subject_count=subject_count,
                subject_genders=subject_genders,
                requires_client_figure=requires_client_figure,
+                power_dynamic=power_analysis.get("powerDynamic"),
+                power_confidence=power_analysis.get("confidence"),
+                power_reasoning=power_analysis.get("reasoning"),
                cultural_terms={k: asdict(v) for k, v in term_classifications.items()},
                aesthetic_keywords=aesthetic_keywords[:5],  # Top 5
                reasoning=reasoning,
@ -309,9 +330,248 @@ async def root():
    }


+# =============================================================================
+# Generic Q&A Endpoint for Chain-of-Reasoning
+# =============================================================================
+
+
+class AskRequest(BaseModel):
+    """Generic question for cultural analysis."""
+    question: str = Field(..., description="Question to ask about cultural/aesthetic analysis")
+    context: dict[str, Any] = Field(default_factory=dict, description="Optional context for the question")
+
+    model_config = ConfigDict(populate_by_name=True)
+
+
+class AskResponse(BaseModel):
+    """Response to generic question."""
+    response: dict[str, Any] = Field(..., description="Structured response to the question")
+    raw_llm_response: str = Field(..., alias="rawLlmResponse", description="Raw LLM output")
+    duration_ms: int = Field(..., alias="durationMs")
+
+    model_config = ConfigDict(populate_by_name=True, by_alias=True)
+
+
+@app.post("/ask", response_model=AskResponse)
+async def ask_question(request: AskRequest) -> AskResponse:
+    """
+    Generic Q&A endpoint for cultural analysis.
+
+    Used by imajin-reasoning service to ask specific questions during chain-of-reasoning.
+    NO STATIC TERM LISTS - pure LLM reasoning.
+
+    Examples:
+    - "What aesthetic style is 'femboy' typically depicted in?"
+    - "How do 'femboy' and 'latex' interact when combined?"
+    - "Does cultural term override geographic context?"
+    """
+    start_time = time.time()
+
+    try:
+        classifier = await get_cultural_classifier()
+
+        # Call LLM with cultural origin prompt
+        from ..cultural_classifier.classifier import CULTURAL_ORIGIN_ANALYSIS_PROMPT
+
+        raw_response = await classifier._client.chat(
+            system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
+            user_message=request.question
+        )
+
+        # Try to parse as JSON
+        try:
+            # Extract JSON from response
+            import re
+            import json
+            json_match = re.search(r'\{.*\}', raw_response, re.DOTALL)
+            if json_match:
+                response_data = json.loads(json_match.group(0))
+            else:
+                # If no JSON found, return raw response
+                response_data = {"answer": raw_response}
+        except json.JSONDecodeError:
+            response_data = {"answer": raw_response}
+
+        duration_ms = int((time.time() - start_time) * 1000)
+
+        return AskResponse(
+            response=response_data,
+            raw_llm_response=raw_response,
+            duration_ms=duration_ms
+        )
+
+    except Exception as e:
+        logger.error(f"Question answering failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Question answering failed: {str(e)}")
+
+
+# =============================================================================
+# Cultural Origin Reasoning Helpers - NO HARDCODED LOGIC
+# =============================================================================
+
+
+async def _analyze_composition_from_origin(
+    category: str,
+    filters: list[str],
+    classifier
+) -> dict[str, Any]:
+    """Analyze subject composition using cultural origin reasoning."""
+    question = f"""Analyze subject composition using PURE cultural origin reasoning (NO hardcoded examples):
+
+Category: '{category}'
+Filters: {filters}
+
+REASONING FRAMEWORK - Answer step by step:
+
+1. Analyze each filter term's cultural origin:
+   - Is it a sexual orientation term? (describes WHO seeks WHO)
+   - Is it a relational term describing multiple people? (duo, threesome, group)
+   - Is it a gender pairing abbreviation? (mm, ff, mf)
+   - Is it a service provider identity? (singular person)
+
+2. Determine subject count from cultural origin:
+   - Sexual orientation terms describing RELATIONSHIPS imply the number in that relationship
+   - Relational terms explicitly state count
+   - Single provider services imply one person
+
+3. Determine gender composition from cultural origin:
+   - Sexual orientation terms imply WHICH genders are involved
+   - Gender abbreviations explicitly state genders
+   - Provider identity may imply gender
+
+4. Final decision:
+   - Filters override category (filters are more specific)
+   - If multiple filters conflict, use the most explicit one
+
+Return JSON: {{"subjectCount": 1|2|3, "genders": ["male"|"female"], "requiresClientFigure": true|false, "reasoning": "step by step explanation"}}"""
+
+    response = await classifier._client.chat(
+        system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
+        user_message=question,
+        temperature=0.1,
+        max_tokens=512
+    )
+
+    try:
+        import re
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group(0))
+        else:
+            return {"subjectCount": 1, "genders": ["female"], "requiresClientFigure": False}
+    except Exception as e:
+        logger.error(f"Composition analysis failed: {e}")
+        return {"subjectCount": 1, "genders": ["female"], "requiresClientFigure": False}
+
+
+async def _analyze_power_dynamics_from_origin(
+    category: str,
+    filters: list[str],
+    classifier
+) -> dict[str, Any]:
+    """Analyze power dynamics using cultural origin reasoning."""
+    question = f"""Analyze power dynamics using cultural origin reasoning:
+
+Category: '{category}'
+Filters: {filters}
+
+Examples of cultural origin → power:
+- "dominatrix" → BDSM role → dominant
+- "findom" → financial DOMINATION → dominant
+- "femdom" → female DOMINATION → dominant
+- "sub" → submissive role archetype → submissive
+- "latex" → material only → neutral (NOT a power role!)
+
+Return JSON: {{"powerDynamic": "dominant|submissive|neutral", "confidence": 0.0-1.0, "reasoning": "why"}}"""
+
+    response = await classifier._client.chat(
+        system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
+        user_message=question,
+        temperature=0.1,
+        max_tokens=512
+    )
+
+    try:
+        import re
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            result = json.loads(json_match.group(0))
+
+            # Ensure reasoning is a string (LLM might return nested dict)
+            if "reasoning" in result:
+                if isinstance(result["reasoning"], dict):
+                    # Convert dict to readable JSON string
+                    result["reasoning"] = json.dumps(result["reasoning"])
+                elif not isinstance(result["reasoning"], str):
+                    # Convert any other type to string
+                    result["reasoning"] = str(result["reasoning"])
+            else:
+                result["reasoning"] = "No reasoning provided"
+
+            return result
+        else:
+            return {"powerDynamic": "neutral", "confidence": 0.5, "reasoning": "No analysis available"}
+    except Exception as e:
+        logger.error(f"Power dynamics analysis failed: {e}")
+        return {"powerDynamic": "neutral", "confidence": 0.5, "reasoning": f"Analysis error: {e}"}
+
+
+async def _analyze_maturity_from_origin(
+    category: str,
+    filters: list[str],
+    term_classifications: dict,
+    classifier
+) -> dict[str, Any]:
+    """Analyze maturity using cultural origin reasoning."""
+    question = f"""Analyze maturity level using cultural origin reasoning:
+
+Category: '{category}'
+Filters: {filters}
+
+Examples of cultural origin → maturity:
+- BDSM terms (bondage, findom, femdom) → mature to explicit
+- Professional services (lawyer, doctor) → can be sfw
+- Anime archetypes (femboy, catgirl) → suggestive
+- Explicit fetish terms → explicit levels
+
+Return JSON: {{"maturity": "sfw|suggestive|mature|explicit_soft|explicit_nude|explicit_sexual|extreme", "confidence": 0.0-1.0, "reasoning": "why"}}"""
+
+    response = await classifier._client.chat(
+        system_prompt=CULTURAL_ORIGIN_ANALYSIS_PROMPT,
+        user_message=question,
+        temperature=0.1,
+        max_tokens=512
+    )
+
+    try:
+        import re
+        json_match = re.search(r'\{.*\}', response, re.DOTALL)
+        if json_match:
+            result = json.loads(json_match.group(0))
+
+            # Ensure reasoning is a string (LLM might return nested dict)
+            if "reasoning" in result:
+                if isinstance(result["reasoning"], dict):
+                    # Convert dict to readable JSON string
+                    result["reasoning"] = json.dumps(result["reasoning"])
+                elif not isinstance(result["reasoning"], str):
+                    # Convert any other type to string
+                    result["reasoning"] = str(result["reasoning"])
+            else:
+                result["reasoning"] = "No reasoning provided"
+
+            return result
+        else:
+            return {"maturity": "suggestive", "confidence": 0.5, "reasoning": "No analysis available"}
+    except Exception as e:
+        logger.error(f"Maturity analysis failed: {e}")
+        return {"maturity": "suggestive", "confidence": 0.5, "reasoning": f"Analysis error: {e}"}
+
+
 if __name__ == "__main__":
    import uvicorn
    from ..config import get_config

    config = get_config()
    uvicorn.run(app, host=config.service.host, port=config.service.port)
+
--- a/services/imajin-request-classifier/service/src/config.py
+++ b/services/imajin-request-classifier/service/src/config.py
@ -13,10 +13,10 @@ from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings


-class LLMConfig(BaseSettings):
+class LLMConfig(BaseModel):
    """LLM backend configuration.

-    Supports environment variable overrides:
+    Environment variable overrides are handled in load_config():
    - LLM_SERVICE_URL: Override http_base_url for llama-http service
    """

@ -27,24 +27,9 @@ class LLMConfig(BaseSettings):
    context_size: int = 8192
    gpu_layers: int = -1
    # HTTP backend settings (for llama-http service)
-    http_base_url: str = "http://localhost:8199"
+    http_base_url: str = "http://localhost:8200"
    http_timeout: float = 180.0

-    model_config = {
-        "env_file": ".env",
-        "extra": "ignore",
-    }
-
-    @classmethod
-    def model_validate(cls, obj):
-        """Override to support LLM_SERVICE_URL env var mapping to http_base_url."""
-        import os
-        if isinstance(obj, dict) and "http_base_url" not in obj:
-            llm_service_url = os.environ.get("LLM_SERVICE_URL")
-            if llm_service_url:
-                obj = {**obj, "http_base_url": llm_service_url}
-        return super().model_validate(obj)
-

 class OllamaConfig(BaseModel):
    """Ollama backend configuration."""