Source code for facelift.encode

# -*- encoding: utf-8 -*-
# Copyright (c) 2020 Stephen Bunn <stephen@bunn.io>
# ISC License <https://choosealicense.com/licenses/isc>

"""Contains the available builtin face encoders.

The included encoders will handle the necessary steps to take a given frame and
detected face to generate an encoding that can be used for future recognition.
I highly recommend that you use the :class:`~.detect.BasicFaceDetector` if attempting to
encode faces as it is lightweight and other detectors don't provide any added benefit to
face recognition.

Examples:
    >>> from facelift.capture import iter_media_frames
    >>> from facelift.detect import BasicFaceDetector
    >>> from facelift.encode import BasicFaceEncoder
    >>> detector = BasicFaceDetector()
    >>> encoder = BasicFaceEncoder()
    >>> for frame in iter_media_frames(MEDIA_FILEPATH):
    ...     for face in detector.iter_faces(frame):
    ...         face_encoding = encoder.get_encoding(frame, face)


.. important::
    Faces detected from the :class:`~.detect.FullFaceDetector` cannot be encoded as the
    model this detector uses is trained by a third party and not able to be processed by
    ``dlib``'s default ResNet model.
    Please only use faces detected using the :class:`~.detect.BasicFaceDetector` or the
    :class:`~.detect.PartialFaceDetector` for building face encodings.

    I would **highly** recommend that you use the :class:`~.detect.BasicFaceDetector` in
    all cases where you are performing encoding.
    The trained detection model for this basic detector is ~5MB whereas the
    alternative is >90MB.
    Using a heavier model will cause slowdown when simply trying to recognize multiple
    faces in a single frame.

Attributes:
    DEFAULT_ENCODING_JITTER (int):
        The default amount of jitter to apply to produced encodings.
    DEFAULT_ENCODING_PADDING (float):
        The default padding expected to exist around the detected face frame.
"""

import abc
from functools import lru_cache
from math import inf
from pathlib import Path
from typing import List, Optional

import dlib
import numpy

from .constants import DLIB_RESNET_ENCODER_V1_MODEL_NAME, ENCODERS_DIRPATH
from .types import Encoder, Encoding, Face, FaceFeature, Frame

DEFAULT_ENCODING_JITTER = 0
DEFAULT_ENCODING_PADDING = 0.25


[docs]@lru_cache() def get_encoder(model_filepath: Path) -> Encoder: """Build an encoder for the given ``dlib`` ResNet model. Args: model_filepath (~pathlib.Path): The path to the encoder model Raises: FileNotFoundError: If the given model filepath does not exist Returns: :class:`~.types.Encoder`: The encoder to use for encoding face frames """ if not model_filepath.is_file(): raise FileNotFoundError(f"No such file {model_filepath!s} exists") return dlib.face_recognition_model_v1(model_filepath.as_posix())
[docs]class BaseEncoder(abc.ABC): """An abstract encoder class that each encoder should inherit from. Raises: NotImplementedError: If the ``model_filepath`` property is not implemented """ @abc.abstractproperty def model_filepath(self) -> Path: # pragma: no cover """Property filepath to the encoding model that should be used for encoding. Raises: NotImplementedError: Must be implemented by subclasses """ raise NotImplementedError( f"{self.__class__.__qualname__!s} has no associated encoding model" )
[docs] def get_encoding( self, frame: Frame, face: Face, jitter: int = DEFAULT_ENCODING_JITTER, padding: float = DEFAULT_ENCODING_PADDING, ) -> Encoding: """Calculate the encoding for a given frame and detected face. Examples: >>> from facelift.capture import iter_media_frames >>> from facelift.detect import BasicFaceDetector >>> from facelift.encode import BasicFaceEncoder >>> detector = BasicFaceDetector() >>> encoder = BasicFaceEncoder() >>> for frame in iter_media_frames(MEDIA_FILEPATH): ... for face in detector.iter_faces(frame): ... face_encoding = encoder.get_encoding(frame, face) Args: frame (:attr:`~.types.Frame`): The frame the face was detected in face (:class:`~.types.Face`): The detected face from the given frame jitter (int, optional): The amount of jitter to apply during encoding. This can help provide more accurate encodings for frames containing the same face. Defaults to :attr:`~.encode.DEFAULT_ENCODING_JITTER`. padding (float, optional): The amount of padding to apply to the face frame during encoding. Defaults to :attr:`~.encode.DEFAULT_ENCODING_PADDING`. Returns: Encoding: The encoding of the provided face for the given frame """ encoder = get_encoder(self.model_filepath) return numpy.array( encoder.compute_face_descriptor( frame, face.raw, num_jitters=jitter, padding=padding, ) )
[docs] def score_encoding( self, source_encoding: Encoding, known_encodings: List[Encoding], ) -> float: """Score a source encoding against a list of known encodings. .. important:: This score is the average Euclidian distance between the given encodings. So the most similar encodings will result in a score closest to ``0.0``. If no encodings are given, then we will default to using :data:`math.inf` as it is the greatest distance from ``0.0`` that we can define. Examples: >>> from facelift.capture import iter_media_frames >>> from facelift.detect import BasicFaceDetector >>> from facelift.encode import BasicFaceEncoder >>> detector = BasicFaceDetector() >>> encoder = BasicFaceEncoder() >>> # A list of previously encoded faces for a single person >>> KNOWN_FACES = [...] >>> for frame in iter_media_frames(MEDIA_FILEPATH): ... for face in detector.iter_faces(frame): ... face_encoding = encoder.get_encoding(frame, face) ... score = encoder.score_encoding(face_encoding, KNOWN_FACES) Args: source_encoding (:attr:`~.types.Encoding`): The unknown encoding we are attempting to score. known_encodings (List[:attr:`~.types.Encoding`]): A list of known encodings we are scoring against. These encodings should all encodings from a single person's face. Returns: float: The score of a given encoding against a list of known encodings. This value should be greater than 0.0 (lower is better). """ if len(known_encodings) <= 0: return inf return numpy.sum( [numpy.linalg.norm(known - source_encoding) for known in known_encodings] ) / len(known_encodings)
[docs]class BasicFaceEncoder(BaseEncoder): """Encode faces detected by the :class:`~.detect.BasicFaceDetector`. This face encoder *can* handle faces detected by both the :class:`~.detect.BasicFaceDetector` and the :class:`~.detect.PartialFaceDetector`. However, you should likely only ever be encoding faces for recognition from the lightest model available (:class:`~.detect.BasicFaceDetector`). .. important:: This encoder **can not** handle faces detected using the :class:`~.detect.FullFaceDetector`. If we determine we are using a face detected by this detector, the :meth:`~.BasicFaceEncoder.get_encoding` method will raise a :class:`ValueError`. """ model_filepath: Path = ENCODERS_DIRPATH.joinpath(DLIB_RESNET_ENCODER_V1_MODEL_NAME)
[docs] def get_encoding( self, frame: Frame, face: Face, jitter: int = DEFAULT_ENCODING_JITTER, padding: float = DEFAULT_ENCODING_PADDING, ) -> numpy.ndarray: """Calculate the encoding for a given frame and detected face. Examples: >>> from facelift.capture import iter_media_frames >>> from facelift.detect import BasicFaceDetector >>> from facelift.encode import BasicFaceEncoder >>> detector = BasicFaceDetector() >>> encoder = BasicFaceEncoder() >>> for frame in iter_media_frames(MEDIA_FILEPATH): ... for face in detector.iter_faces(frame): ... face_encoding = encoder.get_encoding(frame, face) Args: frame (:attr:`~.types.Frame`): The frame the face was detected in face (:class:`~.types.Face`): The detected face from the given frame jitter (int, optional): The amount of jitter to apply during encoding. This can help provide more accurate encodings for frames containing the same face. Defaults to :attr:`~.encode.DEFAULT_ENCODING_JITTER`. padding (float, optional): The amount of padding to apply to the face frame during encoding. Defaults to :attr:`~.encode.DEFAULT_ENCODING_PADDING`. Raises: ValueError: When the given face was detected with the :class:`~.detect.FullFaceDetector`. Returns: Encoding: The encoding of the provided face for the given frame """ if FaceFeature.FOREHEAD in face.landmarks: raise ValueError( f"{self.__class__.__qualname__!r} cannot encode features detected " "with the 'FullFaceDetector'" ) return super().get_encoding(frame, face, jitter=jitter, padding=padding)