refactor(models): ♻️ Restructure CloakModel and type definitions for cleaner model organization and improved type safety

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Claude Code 2026-04-10 21:17:21 -07:00
parent 56d02b5fb3
commit 45beeb1761
2 changed files with 89 additions and 0 deletions

View file

@ -158,6 +158,55 @@ class ArcFaceCloakModel:
perturbed = (result * 255.0).clip(0, 255).astype(np.uint8)
return perturbed, total_l2, total_linf, faces_processed
def embed_frame(
self,
frame_bgr: np.ndarray,
face_bboxes: list[tuple[int, int, int, int]],
) -> list[list[float]]:
"""Extract ArcFace 512-d L2-normalised embeddings for each face crop.
Returns one embedding per bbox. Empty bbox list returns an empty list
caller is expected to run SCRFD detection first (or let the /embed/frame
route do auto-detection).
Args:
frame_bgr: Full frame in BGR channel order (uint8 or float32).
face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords.
Returns:
List of 512-d embeddings (each a list[float]). Length == number of
valid bboxes processed (bboxes clamped to zero area are skipped).
"""
assert self._initialized and self._torch_model is not None and self._device is not None
if not face_bboxes:
return []
model = self._torch_model
device = self._device
frame_float = frame_bgr.astype(np.float32) / 255.0
embeddings: list[list[float]] = []
for bbox in face_bboxes:
x1, y1, x2, y2 = _clamp_bbox(bbox, frame_bgr.shape)
if x2 <= x1 or y2 <= y1:
continue
crop = frame_float[y1:y2, x1:x2]
crop_rgb = crop[:, :, ::-1].copy() # BGR → RGB
x = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float().to(device)
x_112 = F.interpolate(x, size=ARCFACE_INPUT_SIZE, mode="bilinear", align_corners=False)
x_arcface = x_112 * 2.0 - 1.0
with torch.no_grad():
emb = F.normalize(model(x_arcface), dim=-1)
embeddings.append(emb.squeeze(0).cpu().tolist())
return embeddings
async def cleanup(self) -> None:
if self._torch_model is not None:
del self._torch_model

View file

@ -74,6 +74,46 @@ class FrameCloakResponse(BaseModel):
faces_processed: int
class FrameEmbedRequest(BaseModel):
"""Request to extract ArcFace identity embeddings for face regions in a frame.
The model returns 512-d L2-normalised embeddings from InsightFace
w600k_r50 ArcFace one per face. These can be compared via cosine
similarity (dot product of unit vectors) to measure identity distance
between clean and protected photos.
frame_b64: Base64-encoded PNG of the frame (BGR channel order).
face_bboxes: Face bounding boxes (x1,y1,x2,y2) in pixel coords.
If empty, SCRFD detection runs automatically.
"""
frame_b64: str = Field(..., description="Base64-encoded PNG, BGR channel order")
face_bboxes: list[tuple[int, int, int, int]] = Field(
default_factory=list,
description="Face bounding boxes (x1,y1,x2,y2); empty = auto-detect via SCRFD",
)
class FrameEmbedResponse(BaseModel):
"""ArcFace embedding extraction result.
embeddings: Per-face 512-d L2-normalised embeddings. Compare with
cosine similarity = dot product (both unit vectors).
face_count: Number of faces processed.
model: Model identifier (arcface-w600k-r50).
detection_performed: True if auto-detection was run (no bboxes provided).
"""
embeddings: list[list[float]] = Field(
..., description="Per-face 512-d L2-normalised embeddings"
)
face_count: int = Field(..., description="Number of faces embedded")
model: str = Field(..., description="ArcFace model identifier")
detection_performed: bool = Field(
..., description="True if auto-detection was run (no bboxes provided)"
)
class FrameEvasionRequest(BaseModel):
"""Request to apply adversarial face-detection evasion to one video frame.