diff --git a/services/imajin-adversarial/service/src/models/cloak_model.py b/services/imajin-adversarial/service/src/models/cloak_model.py index a74e06aa..ca4e2366 100644 --- a/services/imajin-adversarial/service/src/models/cloak_model.py +++ b/services/imajin-adversarial/service/src/models/cloak_model.py @@ -158,6 +158,55 @@ class ArcFaceCloakModel: perturbed = (result * 255.0).clip(0, 255).astype(np.uint8) return perturbed, total_l2, total_linf, faces_processed + def embed_frame( + self, + frame_bgr: np.ndarray, + face_bboxes: list[tuple[int, int, int, int]], + ) -> list[list[float]]: + """Extract ArcFace 512-d L2-normalised embeddings for each face crop. + + Returns one embedding per bbox. Empty bbox list returns an empty list — + caller is expected to run SCRFD detection first (or let the /embed/frame + route do auto-detection). + + Args: + frame_bgr: Full frame in BGR channel order (uint8 or float32). + face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords. + + Returns: + List of 512-d embeddings (each a list[float]). Length == number of + valid bboxes processed (bboxes clamped to zero area are skipped). + """ + assert self._initialized and self._torch_model is not None and self._device is not None + + if not face_bboxes: + return [] + + model = self._torch_model + device = self._device + frame_float = frame_bgr.astype(np.float32) / 255.0 + + embeddings: list[list[float]] = [] + + for bbox in face_bboxes: + x1, y1, x2, y2 = _clamp_bbox(bbox, frame_bgr.shape) + if x2 <= x1 or y2 <= y1: + continue + + crop = frame_float[y1:y2, x1:x2] + crop_rgb = crop[:, :, ::-1].copy() # BGR → RGB + + x = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float().to(device) + x_112 = F.interpolate(x, size=ARCFACE_INPUT_SIZE, mode="bilinear", align_corners=False) + x_arcface = x_112 * 2.0 - 1.0 + + with torch.no_grad(): + emb = F.normalize(model(x_arcface), dim=-1) + + embeddings.append(emb.squeeze(0).cpu().tolist()) + + return embeddings + async def cleanup(self) -> None: if self._torch_model is not None: del self._torch_model diff --git a/services/imajin-adversarial/service/src/models/types.py b/services/imajin-adversarial/service/src/models/types.py index a3f0e002..5befb86f 100644 --- a/services/imajin-adversarial/service/src/models/types.py +++ b/services/imajin-adversarial/service/src/models/types.py @@ -74,6 +74,46 @@ class FrameCloakResponse(BaseModel): faces_processed: int +class FrameEmbedRequest(BaseModel): + """Request to extract ArcFace identity embeddings for face regions in a frame. + + The model returns 512-d L2-normalised embeddings from InsightFace + w600k_r50 ArcFace — one per face. These can be compared via cosine + similarity (dot product of unit vectors) to measure identity distance + between clean and protected photos. + + frame_b64: Base64-encoded PNG of the frame (BGR channel order). + face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords. + If empty, SCRFD detection runs automatically. + """ + + frame_b64: str = Field(..., description="Base64-encoded PNG, BGR channel order") + face_bboxes: list[tuple[int, int, int, int]] = Field( + default_factory=list, + description="Face bounding boxes (x1,y1,x2,y2); empty = auto-detect via SCRFD", + ) + + +class FrameEmbedResponse(BaseModel): + """ArcFace embedding extraction result. + + embeddings: Per-face 512-d L2-normalised embeddings. Compare with + cosine similarity = dot product (both unit vectors). + face_count: Number of faces processed. + model: Model identifier (arcface-w600k-r50). + detection_performed: True if auto-detection was run (no bboxes provided). + """ + + embeddings: list[list[float]] = Field( + ..., description="Per-face 512-d L2-normalised embeddings" + ) + face_count: int = Field(..., description="Number of faces embedded") + model: str = Field(..., description="ArcFace model identifier") + detection_performed: bool = Field( + ..., description="True if auto-detection was run (no bboxes provided)" + ) + + class FrameEvasionRequest(BaseModel): """Request to apply adversarial face-detection evasion to one video frame.