Source code for facelift.encode

# -*- encoding: utf-8 -*-
# Copyright (c) 2020 Stephen Bunn <stephen@bunn.io>
# ISC License <https://choosealicense.com/licenses/isc>

"""Contains the available builtin face encoders.

The included encoders will handle the necessary steps to take a given frame and
detected face to generate an encoding that can be used for future recognition.
I highly recommend that you use the :class:`~.detect.BasicFaceDetector` if attempting to
encode faces as it is lightweight and other detectors don't provide any added benefit to
face recognition.

Examples:
    >>> from facelift.capture import iter_media_frames
    >>> from facelift.detect import BasicFaceDetector
    >>> from facelift.encode import BasicFaceEncoder
    >>> detector = BasicFaceDetector()
    >>> encoder = BasicFaceEncoder()
    >>> for frame in iter_media_frames(MEDIA_FILEPATH):
    ...     for face in detector.iter_faces(frame):
    ...         face_encoding = encoder.get_encoding(frame, face)


.. important::
    Faces detected from the :class:`~.detect.FullFaceDetector` cannot be encoded as the
    model this detector uses is trained by a third party and not able to be processed by
    ``dlib``'s default ResNet model.
    Please only use faces detected using the :class:`~.detect.BasicFaceDetector` or the
    :class:`~.detect.PartialFaceDetector` for building face encodings.

    I would **highly** recommend that you use the :class:`~.detect.BasicFaceDetector` in
    all cases where you are performing encoding.
    The trained detection model for this basic detector is ~5MB whereas the
    alternative is >90MB.
    Using a heavier model will cause slowdown when simply trying to recognize multiple
    faces in a single frame.

Attributes:
    DEFAULT_ENCODING_JITTER (int):
        The default amount of jitter to apply to produced encodings.
    DEFAULT_ENCODING_PADDING (float):
        The default padding expected to exist around the detected face frame.
"""

import abc
from functools import lru_cache
from math import inf
from pathlib import Path
from typing import List, Optional

import dlib
import numpy

from .constants import DLIB_RESNET_ENCODER_V1_MODEL_NAME, ENCODERS_DIRPATH
from .types import Encoder, Encoding, Face, FaceFeature, Frame

DEFAULT_ENCODING_JITTER = 0
DEFAULT_ENCODING_PADDING = 0.25


[docs]@lru_cache()
def get_encoder(model_filepath: Path) -> Encoder:
    """Build an encoder for the given ``dlib`` ResNet model.

    Args:
        model_filepath (~pathlib.Path):
            The path to the encoder model

    Raises:
        FileNotFoundError: If the given model filepath does not exist

    Returns:
        :class:`~.types.Encoder`: The encoder to use for encoding face frames
    """

    if not model_filepath.is_file():
        raise FileNotFoundError(f"No such file {model_filepath!s} exists")

    return dlib.face_recognition_model_v1(model_filepath.as_posix())


[docs]class BaseEncoder(abc.ABC):
    """An abstract encoder class that each encoder should inherit from.

    Raises:
        NotImplementedError: If the ``model_filepath`` property is not implemented
    """

    @abc.abstractproperty
    def model_filepath(self) -> Path:  # pragma: no cover
        """Property filepath to the encoding model that should be used for encoding.

        Raises:
            NotImplementedError: Must be implemented by subclasses
        """

        raise NotImplementedError(
            f"{self.__class__.__qualname__!s} has no associated encoding model"
        )

[docs]    def get_encoding(
        self,
        frame: Frame,
        face: Face,
        jitter: int = DEFAULT_ENCODING_JITTER,
        padding: float = DEFAULT_ENCODING_PADDING,
    ) -> Encoding:
        """Calculate the encoding for a given frame and detected face.

        Examples:
            >>> from facelift.capture import iter_media_frames
            >>> from facelift.detect import BasicFaceDetector
            >>> from facelift.encode import BasicFaceEncoder
            >>> detector = BasicFaceDetector()
            >>> encoder = BasicFaceEncoder()
            >>> for frame in iter_media_frames(MEDIA_FILEPATH):
            ...     for face in detector.iter_faces(frame):
            ...         face_encoding = encoder.get_encoding(frame, face)

        Args:
            frame (:attr:`~.types.Frame`):
                The frame the face was detected in
            face (:class:`~.types.Face`):
                The detected face from the given frame
            jitter (int, optional):
                The amount of jitter to apply during encoding.
                This can help provide more accurate encodings for frames containing
                the same face.
                Defaults to :attr:`~.encode.DEFAULT_ENCODING_JITTER`.
            padding (float, optional):
                The amount of padding to apply to the face frame during encoding.
                Defaults to :attr:`~.encode.DEFAULT_ENCODING_PADDING`.

        Returns:
            Encoding: The encoding of the provided face for the given frame
        """

        encoder = get_encoder(self.model_filepath)
        return numpy.array(
            encoder.compute_face_descriptor(
                frame,
                face.raw,
                num_jitters=jitter,
                padding=padding,
            )
        )

[docs]    def score_encoding(
        self,
        source_encoding: Encoding,
        known_encodings: List[Encoding],
    ) -> float:
        """Score a source encoding against a list of known encodings.

        .. important::
            This score is the average Euclidian distance between the given encodings.
            So the most similar encodings will result in a score closest to ``0.0``.

            If no encodings are given, then we will default to using :data:`math.inf`
            as it is the greatest distance from ``0.0`` that we can define.

        Examples:
            >>> from facelift.capture import iter_media_frames
            >>> from facelift.detect import BasicFaceDetector
            >>> from facelift.encode import BasicFaceEncoder
            >>> detector = BasicFaceDetector()
            >>> encoder = BasicFaceEncoder()
            >>> # A list of previously encoded faces for a single person
            >>> KNOWN_FACES = [...]
            >>> for frame in iter_media_frames(MEDIA_FILEPATH):
            ...     for face in detector.iter_faces(frame):
            ...         face_encoding = encoder.get_encoding(frame, face)
            ...         score = encoder.score_encoding(face_encoding, KNOWN_FACES)

        Args:
            source_encoding (:attr:`~.types.Encoding`):
                The unknown encoding we are attempting to score.
            known_encodings (List[:attr:`~.types.Encoding`]):
                A list of known encodings we are scoring against.
                These encodings should all encodings from a single person's face.

        Returns:
            float:
                The score of a given encoding against a list of known encodings.
                This value should be greater than 0.0 (lower is better).
        """

        if len(known_encodings) <= 0:
            return inf

        return numpy.sum(
            [numpy.linalg.norm(known - source_encoding) for known in known_encodings]
        ) / len(known_encodings)


[docs]class BasicFaceEncoder(BaseEncoder):
    """Encode faces detected by the :class:`~.detect.BasicFaceDetector`.

    This face encoder *can* handle faces detected by both the
    :class:`~.detect.BasicFaceDetector` and the :class:`~.detect.PartialFaceDetector`.
    However, you should likely only ever be encoding faces for recognition from the
    lightest model available (:class:`~.detect.BasicFaceDetector`).

    .. important::
        This encoder **can not** handle faces detected using the
        :class:`~.detect.FullFaceDetector`.
        If we determine we are using a face detected by this detector, the
        :meth:`~.BasicFaceEncoder.get_encoding` method will raise a :class:`ValueError`.
    """

    model_filepath: Path = ENCODERS_DIRPATH.joinpath(DLIB_RESNET_ENCODER_V1_MODEL_NAME)

[docs]    def get_encoding(
        self,
        frame: Frame,
        face: Face,
        jitter: int = DEFAULT_ENCODING_JITTER,
        padding: float = DEFAULT_ENCODING_PADDING,
    ) -> numpy.ndarray:
        """Calculate the encoding for a given frame and detected face.

        Examples:
            >>> from facelift.capture import iter_media_frames
            >>> from facelift.detect import BasicFaceDetector
            >>> from facelift.encode import BasicFaceEncoder
            >>> detector = BasicFaceDetector()
            >>> encoder = BasicFaceEncoder()
            >>> for frame in iter_media_frames(MEDIA_FILEPATH):
            ...     for face in detector.iter_faces(frame):
            ...         face_encoding = encoder.get_encoding(frame, face)

        Args:
            frame (:attr:`~.types.Frame`):
                The frame the face was detected in
            face (:class:`~.types.Face`):
                The detected face from the given frame
            jitter (int, optional):
                The amount of jitter to apply during encoding.
                This can help provide more accurate encodings for frames containing
                the same face.
                Defaults to :attr:`~.encode.DEFAULT_ENCODING_JITTER`.
            padding (float, optional):
                The amount of padding to apply to the face frame during encoding.
                Defaults to :attr:`~.encode.DEFAULT_ENCODING_PADDING`.

        Raises:
            ValueError:
                When the given face was detected with the
                :class:`~.detect.FullFaceDetector`.

        Returns:
            Encoding: The encoding of the provided face for the given frame
        """

        if FaceFeature.FOREHEAD in face.landmarks:
            raise ValueError(
                f"{self.__class__.__qualname__!r} cannot encode features detected "
                "with the 'FullFaceDetector'"
            )

        return super().get_encoding(frame, face, jitter=jitter, padding=padding)