refactor(models): ♻️ Restructure CloakModel and type definitions for cleaner model organization and improved type safety

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-10 21:17:21 -07:00 · 2026-04-10 21:17:21 -07:00 · 45beeb1761
commit 45beeb1761
parent 56d02b5fb3
2 changed files with 89 additions and 0 deletions
--- a/services/imajin-adversarial/service/src/models/cloak_model.py
+++ b/services/imajin-adversarial/service/src/models/cloak_model.py
@ -158,6 +158,55 @@ class ArcFaceCloakModel:
        perturbed = (result * 255.0).clip(0, 255).astype(np.uint8)
        return perturbed, total_l2, total_linf, faces_processed

+    def embed_frame(
+        self,
+        frame_bgr: np.ndarray,
+        face_bboxes: list[tuple[int, int, int, int]],
+    ) -> list[list[float]]:
+        """Extract ArcFace 512-d L2-normalised embeddings for each face crop.
+
+        Returns one embedding per bbox. Empty bbox list returns an empty list —
+        caller is expected to run SCRFD detection first (or let the /embed/frame
+        route do auto-detection).
+
+        Args:
+            frame_bgr:   Full frame in BGR channel order (uint8 or float32).
+            face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords.
+
+        Returns:
+            List of 512-d embeddings (each a list[float]). Length == number of
+            valid bboxes processed (bboxes clamped to zero area are skipped).
+        """
+        assert self._initialized and self._torch_model is not None and self._device is not None
+
+        if not face_bboxes:
+            return []
+
+        model = self._torch_model
+        device = self._device
+        frame_float = frame_bgr.astype(np.float32) / 255.0
+
+        embeddings: list[list[float]] = []
+
+        for bbox in face_bboxes:
+            x1, y1, x2, y2 = _clamp_bbox(bbox, frame_bgr.shape)
+            if x2 <= x1 or y2 <= y1:
+                continue
+
+            crop = frame_float[y1:y2, x1:x2]
+            crop_rgb = crop[:, :, ::-1].copy()  # BGR → RGB
+
+            x = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float().to(device)
+            x_112 = F.interpolate(x, size=ARCFACE_INPUT_SIZE, mode="bilinear", align_corners=False)
+            x_arcface = x_112 * 2.0 - 1.0
+
+            with torch.no_grad():
+                emb = F.normalize(model(x_arcface), dim=-1)
+
+            embeddings.append(emb.squeeze(0).cpu().tolist())
+
+        return embeddings
+
    async def cleanup(self) -> None:
        if self._torch_model is not None:
            del self._torch_model
--- a/services/imajin-adversarial/service/src/models/types.py
+++ b/services/imajin-adversarial/service/src/models/types.py
@ -74,6 +74,46 @@ class FrameCloakResponse(BaseModel):
    faces_processed: int


+class FrameEmbedRequest(BaseModel):
+    """Request to extract ArcFace identity embeddings for face regions in a frame.
+
+    The model returns 512-d L2-normalised embeddings from InsightFace
+    w600k_r50 ArcFace — one per face. These can be compared via cosine
+    similarity (dot product of unit vectors) to measure identity distance
+    between clean and protected photos.
+
+    frame_b64:   Base64-encoded PNG of the frame (BGR channel order).
+    face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords.
+                 If empty, SCRFD detection runs automatically.
+    """
+
+    frame_b64: str = Field(..., description="Base64-encoded PNG, BGR channel order")
+    face_bboxes: list[tuple[int, int, int, int]] = Field(
+        default_factory=list,
+        description="Face bounding boxes (x1,y1,x2,y2); empty = auto-detect via SCRFD",
+    )
+
+
+class FrameEmbedResponse(BaseModel):
+    """ArcFace embedding extraction result.
+
+    embeddings:          Per-face 512-d L2-normalised embeddings. Compare with
+                         cosine similarity = dot product (both unit vectors).
+    face_count:          Number of faces processed.
+    model:               Model identifier (arcface-w600k-r50).
+    detection_performed: True if auto-detection was run (no bboxes provided).
+    """
+
+    embeddings: list[list[float]] = Field(
+        ..., description="Per-face 512-d L2-normalised embeddings"
+    )
+    face_count: int = Field(..., description="Number of faces embedded")
+    model: str = Field(..., description="ArcFace model identifier")
+    detection_performed: bool = Field(
+        ..., description="True if auto-detection was run (no bboxes provided)"
+    )
+
+
 class FrameEvasionRequest(BaseModel):
    """Request to apply adversarial face-detection evasion to one video frame.