|
| 1 | +# Copyright 2015-2016 Carnegie Mellon University |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""Module for dlib-based alignment.""" |
| 16 | + |
| 17 | +# NOTE: This file has been copied from the openface project. |
| 18 | +# https://github.com/cmusatyalab/openface/blob/master/openface/align_dlib.py |
| 19 | + |
| 20 | +import cv2 |
| 21 | +import dlib |
| 22 | +import numpy as np |
| 23 | + |
| 24 | +TEMPLATE = np.float32([ |
| 25 | + (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943), |
| 26 | + (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066), |
| 27 | + (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778), |
| 28 | + (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149), |
| 29 | + (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107), |
| 30 | + (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279), |
| 31 | + (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421), |
| 32 | + (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744), |
| 33 | + (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053), |
| 34 | + (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323), |
| 35 | + (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851), |
| 36 | + (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854), |
| 37 | + (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114), |
| 38 | + (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193), |
| 39 | + (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758), |
| 40 | + (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668), |
| 41 | + (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208), |
| 42 | + (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656), |
| 43 | + (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002), |
| 44 | + (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083), |
| 45 | + (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225), |
| 46 | + (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267), |
| 47 | + (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656), |
| 48 | + (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172), |
| 49 | + (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073), |
| 50 | + (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768), |
| 51 | + (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516), |
| 52 | + (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972), |
| 53 | + (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792), |
| 54 | + (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727), |
| 55 | + (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612), |
| 56 | + (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691), |
| 57 | + (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626), |
| 58 | + (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)]) |
| 59 | + |
| 60 | +INV_TEMPLATE = np.float32([ |
| 61 | + (-0.04099179660567834, -0.008425234314031194, 2.575498465013183), |
| 62 | + (0.04062510634554352, -0.009678089746831375, -1.2534351452524177), |
| 63 | + (0.0003666902601348179, 0.01810332406086298, -0.32206331976076663)]) |
| 64 | + |
| 65 | +TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0) |
| 66 | +MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN) |
| 67 | + |
| 68 | + |
| 69 | +class AlignDlib: |
| 70 | + """ |
| 71 | + Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces. |
| 72 | +
|
| 73 | + The alignment preprocess faces for input into a neural network. |
| 74 | + Faces are resized to the same size (such as 96x96) and transformed |
| 75 | + to make landmarks (such as the eyes and nose) appear at the same |
| 76 | + location on every image. |
| 77 | +
|
| 78 | + Normalized landmarks: |
| 79 | +
|
| 80 | + .. image:: ../images/dlib-landmark-mean.png |
| 81 | + """ |
| 82 | + |
| 83 | + #: Landmark indices corresponding to the inner eyes and bottom lip. |
| 84 | + INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57] |
| 85 | + |
| 86 | + #: Landmark indices corresponding to the outer eyes and nose. |
| 87 | + OUTER_EYES_AND_NOSE = [36, 45, 33] |
| 88 | + |
| 89 | + def __init__(self, facePredictor): |
| 90 | + """ |
| 91 | + Instantiate an 'AlignDlib' object. |
| 92 | +
|
| 93 | + :param facePredictor: The path to dlib's |
| 94 | + :type facePredictor: str |
| 95 | + """ |
| 96 | + assert facePredictor is not None |
| 97 | + |
| 98 | + # pylint: disable=no-member |
| 99 | + self.detector = dlib.get_frontal_face_detector() |
| 100 | + self.predictor = dlib.shape_predictor(facePredictor) |
| 101 | + |
| 102 | + def getAllFaceBoundingBoxes(self, rgbImg): |
| 103 | + """ |
| 104 | + Find all face bounding boxes in an image. |
| 105 | +
|
| 106 | + :param rgbImg: RGB image to process. Shape: (height, width, 3) |
| 107 | + :type rgbImg: numpy.ndarray |
| 108 | + :return: All face bounding boxes in an image. |
| 109 | + :rtype: dlib.rectangles |
| 110 | + """ |
| 111 | + assert rgbImg is not None |
| 112 | + |
| 113 | + try: |
| 114 | + return self.detector(rgbImg, 1) |
| 115 | + except Exception as e: # pylint: disable=broad-except |
| 116 | + print("Warning: {}".format(e)) |
| 117 | + # In rare cases, exceptions are thrown. |
| 118 | + return [] |
| 119 | + |
| 120 | + def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False): |
| 121 | + """ |
| 122 | + Find the largest face bounding box in an image. |
| 123 | +
|
| 124 | + :param rgbImg: RGB image to process. Shape: (height, width, 3) |
| 125 | + :type rgbImg: numpy.ndarray |
| 126 | + :param skipMulti: Skip image if more than one face detected. |
| 127 | + :type skipMulti: bool |
| 128 | + :return: The largest face bounding box in an image, or None. |
| 129 | + :rtype: dlib.rectangle |
| 130 | + """ |
| 131 | + assert rgbImg is not None |
| 132 | + |
| 133 | + faces = self.getAllFaceBoundingBoxes(rgbImg) |
| 134 | + if (not skipMulti and len(faces) > 0) or len(faces) == 1: |
| 135 | + return max(faces, key=lambda rect: rect.width() * rect.height()) |
| 136 | + else: |
| 137 | + return None |
| 138 | + |
| 139 | + def findLandmarks(self, rgbImg, bb): |
| 140 | + """ |
| 141 | + Find the landmarks of a face. |
| 142 | +
|
| 143 | + :param rgbImg: RGB image to process. Shape: (height, width, 3) |
| 144 | + :type rgbImg: numpy.ndarray |
| 145 | + :param bb: Bounding box around the face to find landmarks for. |
| 146 | + :type bb: dlib.rectangle |
| 147 | + :return: Detected landmark locations. |
| 148 | + :rtype: list of (x,y) tuples |
| 149 | + """ |
| 150 | + assert rgbImg is not None |
| 151 | + assert bb is not None |
| 152 | + |
| 153 | + points = self.predictor(rgbImg, bb) |
| 154 | + # return list(map(lambda p: (p.x, p.y), points.parts())) |
| 155 | + return [(p.x, p.y) for p in points.parts()] |
| 156 | + |
| 157 | + # pylint: disable=dangerous-default-value |
| 158 | + def align(self, imgDim, rgbImg, bb=None, |
| 159 | + landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP, |
| 160 | + skipMulti=False, scale=1.0): |
| 161 | + r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP) |
| 162 | +
|
| 163 | + Transform and align a face in an image. |
| 164 | +
|
| 165 | + :param imgDim: The edge length in pixels of the square the image is resized to. |
| 166 | + :type imgDim: int |
| 167 | + :param rgbImg: RGB image to process. Shape: (height, width, 3) |
| 168 | + :type rgbImg: numpy.ndarray |
| 169 | + :param bb: Bounding box around the face to align. \ |
| 170 | + Defaults to the largest face. |
| 171 | + :type bb: dlib.rectangle |
| 172 | + :param landmarks: Detected landmark locations. \ |
| 173 | + Landmarks found on `bb` if not provided. |
| 174 | + :type landmarks: list of (x,y) tuples |
| 175 | + :param landmarkIndices: The indices to transform to. |
| 176 | + :type landmarkIndices: list of ints |
| 177 | + :param skipMulti: Skip image if more than one face detected. |
| 178 | + :type skipMulti: bool |
| 179 | + :param scale: Scale image before cropping to the size given by imgDim. |
| 180 | + :type scale: float |
| 181 | + :return: The aligned RGB image. Shape: (imgDim, imgDim, 3) |
| 182 | + :rtype: numpy.ndarray |
| 183 | + """ |
| 184 | + assert imgDim is not None |
| 185 | + assert rgbImg is not None |
| 186 | + assert landmarkIndices is not None |
| 187 | + |
| 188 | + if bb is None: |
| 189 | + bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti) |
| 190 | + if bb is None: |
| 191 | + return |
| 192 | + |
| 193 | + if landmarks is None: |
| 194 | + landmarks = self.findLandmarks(rgbImg, bb) |
| 195 | + |
| 196 | + npLandmarks = np.float32(landmarks) |
| 197 | + npLandmarkIndices = np.array(landmarkIndices) |
| 198 | + |
| 199 | + # pylint: disable=maybe-no-member |
| 200 | + H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices], |
| 201 | + imgDim * MINMAX_TEMPLATE[npLandmarkIndices] * scale + imgDim * (1 - scale) / 2) |
| 202 | + thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim)) |
| 203 | + |
| 204 | + return thumbnail |
0 commit comments