Skip to content

Commit fad43bd

Browse files
committed
Added embedding / facial recognition sample.
Reorganise structure
1 parent 017c9db commit fad43bd

26 files changed

+2972
-1710
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
*.ipynb_checkpoints*
22
ComputerVision/logs/
3+
ComputerVision/FacialRecognition/output/
4+
ComputerVision/Keras MNIST/logs/
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
env/
2+
data/*
3+
etc/*
4+
.idea/*
5+
.DS_STORE
6+
*.pyc
7+
**/__pycache__/
8+
9+
!data/.gitkeep
10+
!etc/.gitkeep
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
FROM tensorflow/tensorflow:latest
2+
3+
RUN apt-get update -y --fix-missing
4+
RUN apt-get install -y ffmpeg
5+
RUN apt-get install -y build-essential cmake pkg-config \
6+
libjpeg8-dev libtiff5-dev libjasper-dev libpng12-dev \
7+
libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
8+
libxvidcore-dev libx264-dev \
9+
libgtk-3-dev \
10+
libatlas-base-dev gfortran \
11+
libboost-all-dev \
12+
python3 python3-dev python3-numpy
13+
14+
RUN apt-get install -y wget vim python3-tk python3-pip
15+
16+
WORKDIR /
17+
RUN wget -O opencv.zip https://github.com/Itseez/opencv/archive/3.2.0.zip \
18+
&& unzip opencv.zip \
19+
&& wget -O opencv_contrib.zip https://github.com/Itseez/opencv_contrib/archive/3.2.0.zip \
20+
&& unzip opencv_contrib.zip
21+
22+
# install opencv3.2
23+
RUN cd /opencv-3.2.0/ \
24+
&& mkdir build \
25+
&& cd build \
26+
&& cmake -D CMAKE_BUILD_TYPE=RELEASE \
27+
-D INSTALL_C_EXAMPLES=OFF \
28+
-D INSTALL_PYTHON_EXAMPLES=ON \
29+
-D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-3.2.0/modules \
30+
-D BUILD_EXAMPLES=OFF \
31+
-D BUILD_opencv_python2=OFF \
32+
-D BUILD_NEW_PYTHON_SUPPORT=ON \
33+
-D CMAKE_INSTALL_PREFIX=$(python3 -c "import sys; print(sys.prefix)") \
34+
-D PYTHON_EXECUTABLE=$(which python3) \
35+
-D WITH_FFMPEG=1 \
36+
-D WITH_CUDA=0 \
37+
.. \
38+
&& make -j8 \
39+
&& make install \
40+
&& ldconfig \
41+
&& rm /opencv.zip \
42+
&& rm /opencv_contrib.zip
43+
44+
45+
# Install dlib 19.4
46+
RUN wget -O dlib-19.4.tar.bz2 http://dlib.net/files/dlib-19.4.tar.bz2 \
47+
&& tar -vxjf dlib-19.4.tar.bz2
48+
49+
RUN cd dlib-19.4 \
50+
&& cd examples \
51+
&& mkdir build \
52+
&& cd build \
53+
&& cmake .. \
54+
&& cmake --build . --config Release \
55+
&& cd /dlib-19.4 \
56+
&& pip3 install setuptools \
57+
&& python3 setup.py install \
58+
&& cd $WORKDIR \
59+
&& rm /dlib-19.4.tar.bz2
60+
61+
62+
63+
ADD $PWD/requirements.txt /requirements.txt
64+
RUN pip3 install -r /requirements.txt
65+
66+
67+
CMD ["/bin/bash"]
68+
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
FROM tensorflow/tensorflow:latest-gpu
2+
3+
RUN apt-get update -y --fix-missing
4+
RUN apt-get install -y ffmpeg
5+
RUN apt-get install -y build-essential cmake pkg-config \
6+
libjpeg8-dev libtiff5-dev libjasper-dev libpng12-dev \
7+
libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
8+
libxvidcore-dev libx264-dev \
9+
libgtk-3-dev \
10+
libatlas-base-dev gfortran \
11+
libboost-all-dev \
12+
python3 python3-dev python3-numpy
13+
14+
RUN apt-get install -y wget vim python3-tk python3-pip
15+
16+
WORKDIR /
17+
RUN wget -O opencv.zip https://github.com/Itseez/opencv/archive/3.2.0.zip \
18+
&& unzip opencv.zip \
19+
&& wget -O opencv_contrib.zip https://github.com/Itseez/opencv_contrib/archive/3.2.0.zip \
20+
&& unzip opencv_contrib.zip
21+
22+
# install opencv3.2
23+
RUN cd /opencv-3.2.0/ \
24+
&& mkdir build \
25+
&& cd build \
26+
&& cmake -D CMAKE_BUILD_TYPE=RELEASE \
27+
-D INSTALL_C_EXAMPLES=OFF \
28+
-D INSTALL_PYTHON_EXAMPLES=ON \
29+
-D OPENCV_EXTRA_MODULES_PATH=/opencv_contrib-3.2.0/modules \
30+
-D BUILD_EXAMPLES=OFF \
31+
-D BUILD_opencv_python2=OFF \
32+
-D BUILD_NEW_PYTHON_SUPPORT=ON \
33+
-D CMAKE_INSTALL_PREFIX=$(python3 -c "import sys; print(sys.prefix)") \
34+
-D PYTHON_EXECUTABLE=$(which python3) \
35+
-D WITH_FFMPEG=1 \
36+
-D WITH_CUDA=1 \
37+
.. \
38+
&& make -j8 \
39+
&& make install \
40+
&& ldconfig \
41+
&& rm /opencv.zip \
42+
&& rm /opencv_contrib.zip
43+
44+
45+
# Install dlib 19.4
46+
RUN wget -O dlib-19.4.tar.bz2 http://dlib.net/files/dlib-19.4.tar.bz2 \
47+
&& tar -vxjf dlib-19.4.tar.bz2
48+
49+
RUN cd dlib-19.4 \
50+
&& cd examples \
51+
&& mkdir build \
52+
&& cd build \
53+
&& cmake .. \
54+
&& cmake --build . --config Release \
55+
&& cd /dlib-19.4 \
56+
&& pip3 install setuptools \
57+
&& python3 setup.py install \
58+
&& cd $WORKDIR \
59+
&& rm /dlib-19.4.tar.bz2
60+
61+
62+
63+
ADD $PWD/requirements-gpu.txt /requirements-gpu.txt
64+
RUN pip3 install -r /requirements-gpu.txt
65+
66+
67+
CMD ["/bin/bash"]
68+
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Medium-Facenet-Tutorial
2+
3+
Tutorial demonstrating use of Tensorflow, Dlib, and Scikit-learn to create a facial recognition pipeline.
4+
5+
https://hackernoon.com/building-a-facial-recognition-pipeline-with-deep-learning-in-tensorflow-66e7645015b8

ComputerVision/FacialRecognition/etc/.gitkeep

Whitespace-only changes.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
tensorflow-gpu
2+
keras
3+
setuptools
4+
matplotlib
5+
numpy
6+
pandas
7+
jupyter
8+
scikit-learn
9+
h5py
10+
requests
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
tensorflow==1.1.0
2+
scikit-learn==0.18.2
3+
scipy==0.19.1
4+
numpy==1.13.1
5+
requests

ComputerVision/FacialRecognition/source/__init__.py

Whitespace-only changes.
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# Copyright 2015-2016 Carnegie Mellon University
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Module for dlib-based alignment."""
16+
17+
# NOTE: This file has been copied from the openface project.
18+
# https://github.com/cmusatyalab/openface/blob/master/openface/align_dlib.py
19+
20+
import cv2
21+
import dlib
22+
import numpy as np
23+
24+
TEMPLATE = np.float32([
25+
(0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
26+
(0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
27+
(0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
28+
(0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
29+
(0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
30+
(0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
31+
(0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
32+
(0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
33+
(0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
34+
(0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
35+
(0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
36+
(0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
37+
(0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
38+
(0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
39+
(0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
40+
(0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
41+
(0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
42+
(0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
43+
(0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
44+
(0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
45+
(0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
46+
(0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
47+
(0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
48+
(0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
49+
(0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
50+
(0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
51+
(0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
52+
(0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
53+
(0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
54+
(0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
55+
(0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
56+
(0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
57+
(0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
58+
(0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
59+
60+
INV_TEMPLATE = np.float32([
61+
(-0.04099179660567834, -0.008425234314031194, 2.575498465013183),
62+
(0.04062510634554352, -0.009678089746831375, -1.2534351452524177),
63+
(0.0003666902601348179, 0.01810332406086298, -0.32206331976076663)])
64+
65+
TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
66+
MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
67+
68+
69+
class AlignDlib:
70+
"""
71+
Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces.
72+
73+
The alignment preprocess faces for input into a neural network.
74+
Faces are resized to the same size (such as 96x96) and transformed
75+
to make landmarks (such as the eyes and nose) appear at the same
76+
location on every image.
77+
78+
Normalized landmarks:
79+
80+
.. image:: ../images/dlib-landmark-mean.png
81+
"""
82+
83+
#: Landmark indices corresponding to the inner eyes and bottom lip.
84+
INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
85+
86+
#: Landmark indices corresponding to the outer eyes and nose.
87+
OUTER_EYES_AND_NOSE = [36, 45, 33]
88+
89+
def __init__(self, facePredictor):
90+
"""
91+
Instantiate an 'AlignDlib' object.
92+
93+
:param facePredictor: The path to dlib's
94+
:type facePredictor: str
95+
"""
96+
assert facePredictor is not None
97+
98+
# pylint: disable=no-member
99+
self.detector = dlib.get_frontal_face_detector()
100+
self.predictor = dlib.shape_predictor(facePredictor)
101+
102+
def getAllFaceBoundingBoxes(self, rgbImg):
103+
"""
104+
Find all face bounding boxes in an image.
105+
106+
:param rgbImg: RGB image to process. Shape: (height, width, 3)
107+
:type rgbImg: numpy.ndarray
108+
:return: All face bounding boxes in an image.
109+
:rtype: dlib.rectangles
110+
"""
111+
assert rgbImg is not None
112+
113+
try:
114+
return self.detector(rgbImg, 1)
115+
except Exception as e: # pylint: disable=broad-except
116+
print("Warning: {}".format(e))
117+
# In rare cases, exceptions are thrown.
118+
return []
119+
120+
def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False):
121+
"""
122+
Find the largest face bounding box in an image.
123+
124+
:param rgbImg: RGB image to process. Shape: (height, width, 3)
125+
:type rgbImg: numpy.ndarray
126+
:param skipMulti: Skip image if more than one face detected.
127+
:type skipMulti: bool
128+
:return: The largest face bounding box in an image, or None.
129+
:rtype: dlib.rectangle
130+
"""
131+
assert rgbImg is not None
132+
133+
faces = self.getAllFaceBoundingBoxes(rgbImg)
134+
if (not skipMulti and len(faces) > 0) or len(faces) == 1:
135+
return max(faces, key=lambda rect: rect.width() * rect.height())
136+
else:
137+
return None
138+
139+
def findLandmarks(self, rgbImg, bb):
140+
"""
141+
Find the landmarks of a face.
142+
143+
:param rgbImg: RGB image to process. Shape: (height, width, 3)
144+
:type rgbImg: numpy.ndarray
145+
:param bb: Bounding box around the face to find landmarks for.
146+
:type bb: dlib.rectangle
147+
:return: Detected landmark locations.
148+
:rtype: list of (x,y) tuples
149+
"""
150+
assert rgbImg is not None
151+
assert bb is not None
152+
153+
points = self.predictor(rgbImg, bb)
154+
# return list(map(lambda p: (p.x, p.y), points.parts()))
155+
return [(p.x, p.y) for p in points.parts()]
156+
157+
# pylint: disable=dangerous-default-value
158+
def align(self, imgDim, rgbImg, bb=None,
159+
landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
160+
skipMulti=False, scale=1.0):
161+
r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
162+
163+
Transform and align a face in an image.
164+
165+
:param imgDim: The edge length in pixels of the square the image is resized to.
166+
:type imgDim: int
167+
:param rgbImg: RGB image to process. Shape: (height, width, 3)
168+
:type rgbImg: numpy.ndarray
169+
:param bb: Bounding box around the face to align. \
170+
Defaults to the largest face.
171+
:type bb: dlib.rectangle
172+
:param landmarks: Detected landmark locations. \
173+
Landmarks found on `bb` if not provided.
174+
:type landmarks: list of (x,y) tuples
175+
:param landmarkIndices: The indices to transform to.
176+
:type landmarkIndices: list of ints
177+
:param skipMulti: Skip image if more than one face detected.
178+
:type skipMulti: bool
179+
:param scale: Scale image before cropping to the size given by imgDim.
180+
:type scale: float
181+
:return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
182+
:rtype: numpy.ndarray
183+
"""
184+
assert imgDim is not None
185+
assert rgbImg is not None
186+
assert landmarkIndices is not None
187+
188+
if bb is None:
189+
bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
190+
if bb is None:
191+
return
192+
193+
if landmarks is None:
194+
landmarks = self.findLandmarks(rgbImg, bb)
195+
196+
npLandmarks = np.float32(landmarks)
197+
npLandmarkIndices = np.array(landmarkIndices)
198+
199+
# pylint: disable=maybe-no-member
200+
H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices],
201+
imgDim * MINMAX_TEMPLATE[npLandmarkIndices] * scale + imgDim * (1 - scale) / 2)
202+
thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
203+
204+
return thumbnail

0 commit comments

Comments
 (0)