如何使用 OpenCV 进行人脸识别。


第二, 在嵌入基础上使用支持向量机(SVM)训练人脸识别模型。

第三,最后使用 OpenCV 识别图像和视频流中的人脸。




# import the necessary packagesimport numpy as npimport pickleimport cv2import osimport os


def list_images(basePath, contains=None):    # return the set of files that are valid    return list_files(basePath, validExts=image_types, contains=contains)def list_files(basePath, validExts=None, contains=None):    # loop over the directory structure    for (rootDir, dirNames, filenames) in os.walk(basePath):        # loop over the filenames in the current directory        for filename in filenames:            # if the contains string is not none and the filename does not contain            # the supplied string, then ignore the file            if contains is not None and filename.find(contains) == -1:                continue            # determine the file extension of the current file            ext = filename[filename.rfind("."):].lower()            # check to see if the file is an image and should be processed            if validExts is None or ext.endswith(validExts):                # construct the path to the image and yield it                imagePath = os.path.join(rootDir, filename)                yield imagePathdef resize(image, width=None, height=None, inter=cv2.INTER_AREA):    dim = None    (h, w) = image.shape[:2]    # 如果高和宽为None则直接返回    if width is None and height is None:        return image    # 检查宽是否是None    if width is None:        # 计算高度的比例并并按照比例计算宽度        r = height / float(h)        dim = (int(w * r), height)    # 高为None    else:        # 计算宽度比例,并计算高度        r = width / float(w)        dim = (width, int(h * r))    resized = cv2.resize(image, dim, interpolation=inter)    # return the resized image    return resized










print("loading face detector...")protoPath = os.path.sep.join([detector_path, "deploy.proto.txt"])modelPath = os.path.sep.join([detector_path,"res10_300x300_ssd_iter_140000_fp16.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# 加载序列化的人脸编码模型print("loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(embedding_model)# 获取数据集中输入图像的路径print("quantifying faces...")imagePaths = list(list_images(dataset_path))# 初始化我们提取的面部编码列表和相应的人名knownEmbeddings = []knownNames = []# 初始化处理的人脸总数total = 0# loop over the image pathsfor (i, imagePath) in enumerate(imagePaths):    # extract the person name from the image path    print("processing image {}/{}".format(i + 1,len(imagePaths)))    name = imagePath.split(os.path.sep)[-2]    # 加载图像,将其调整为宽度为 600 像素(同时保持纵横比),然后抓取图像尺寸    image = cv2.imread(imagePath)    image = resize(image, width=600)    (h, w) = image.shape[:2]    # 从图像构建一个 blob    imageBlob = cv2.dnn.blobFromImage(        cv2.resize(image, (300, 300)), 1.0, (300, 300),        (104.0, 177.0, 123.0), swapRB=False, crop=False)    # 使用 OpenCV 的基于深度学习的人脸检测器来定位输入图像中的人脸    detector.setInput(imageBlob)    detections = detector.forward()    # ensure at least one face was found    if len(detections) > 0:        # 假设每个图像只有一张脸,所以找到概率最大的边界框        i = np.argmax(detections[0, 0, :, 2])        confidence = detections[0, 0, i, 2]        # 确保最大概率的检测也意味着我们的最小概率测试(从而帮助过滤掉弱检测)        if confidence > confidence_low:            # 计算人脸边界框的 (x, y) 坐标            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])            (startX, startY, endX, endY) = box.astype("int")            # 提取人脸ROI并抓取ROI维度            face = image[startY:endY, startX:endX]            (fH, fW) = face.shape[:2]            # 确保人脸宽度和高度足够大            if fW < 20 or fH < 20:                continue            # 为人脸 ROI 构造一个 blob,然后将 blob 通过我们的人脸嵌入模型来获得人脸的 128-d 量化            faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,                                             (96, 96), (0, 0, 0), swapRB=True, crop=False)            embedder.setInput(faceBlob)            vec = embedder.forward()            # 将人名+对应的人脸嵌入添加到各自的列表中            knownNames.append(name)            knownEmbeddings.append(vec.flatten())            total += 1# 保存编码文件print("serializing {} encodings...".format(total))data = {"embeddings": knownEmbeddings, "names": knownNames}f = open(embeddings_path, "wb")f.write(pickle.dumps(data))f.close()





遍历 imagePaths。从路径中提取人名。

构造了一个 blob。

然后,通过将 imageBlob 通过检测器网络来检测图像中的人脸。


假设我们至少有一个检测,将进入 if 语句的主体。


假设已经达到了这个阈值,提取面部 ROI 并抓取/检查尺寸以确保面部 ROI 足够大。

然后,我们将利用编码器 并提取人脸编码。

继续构建另一个 blob。

随后,将 faceBlob 通过编码器 。 这会生成一个 128 维向量 (vec) 来描述面部。

然后我们简单地将名称和嵌入 vec 分别添加到 knownNames 和 knownEmbeddings 中。




已经为每张脸提取了 128 维编码--但是我们如何根据这些嵌入来识别一个人呢?

答案是我们需要在嵌入之上训练一个"标准"机器学习模型(例如 SVM、k-NN 分类器、随机森林等)。


打开 train_face.py 文件并插入以下代码:

from sklearn.preprocessing import LabelEncoderfrom sklearn.svm import SVCimport pickleembeddings_path='output/embeddings.pickle'recognizer_path='output/recognizer.pickle'lable_path='output/le.pickle'# 加载编码模型print("[INFO] loading face embeddings...")data = pickle.loads(open(embeddings_path, "rb").read())# 给label编码print("[INFO] encoding labels...")le = LabelEncoder()labels = le.fit_transform(data["names"])# 训练用于接受人脸 128-d 嵌入的模型,然后产生实际的人脸识别recognizer = SVC(C=1.0, kernel="linear", probability=True)recognizer.fit(data["embeddings"], labels)# 保存模型f = open(recognizer_path, "wb")f.write(pickle.dumps(recognizer))f.close()# 保存lablef = open(lable_path, "wb")f.write(pickle.dumps(le))f.close()

导入包和模块。 我们将使用 scikit-learn 的支持向量机 (SVM) 实现,这是一种常见的机器学习模型。


  • embeddings_path:序列化编码。

  • recognizer_path:这将是我们识别人脸的输出模型。 它基于 SVM。

  • lable_path:标签编码器输出文件路径


然后初始化 scikit-learn LabelEncoder 并编码名称标签。

训练模型。本文使用的是线性支持向量机 (SVM),但如果您愿意,您可以尝试使用其他机器学习模型进行试验。


运行train_face.py 脚本。



import numpy as npimport pickleimport cv2import os


def resize(image, width=None, height=None, inter=cv2.INTER_AREA):    dim = None    (h, w) = image.shape[:2]    # 如果高和宽为None则直接返回    if width is None and height is None:        return image    # 检查宽是否是None    if width is None:        # 计算高度的比例并并按照比例计算宽度        r = height / float(h)        dim = (int(w * r), height)    # 高为None    else:        # 计算宽度比例,并计算高度        r = width / float(w)        dim = (width, int(h * r))    resized = cv2.resize(image, dim, interpolation=inter)    # return the resized image    return resized


image_path = '11.jpg'detector_path = 'face_dete_model'embedding_path = 'nn4.small2.v1.t7'recognizer_path = 'output/recognizer.pickle'label_path = 'output/le.pickle'confidence_low = 0.5


  1. image_path :输入图像的路径。

  2. detector_path:OpenCV 深度学习人脸检测器的路径。 使用这个模型来检测人脸 ROI 在图像中的位置。

  3. embedding_path : OpenCV 深度学习人脸编码模型的路径。 我们将使用这个模型从人脸 ROI 中提取 128 维人脸嵌入--然后将把数据输入到识别器中。

  4. recognizer_path :识别器模型的路径。

  5. label_path : 标签编码器的路径。

  6. confidence_low:过滤弱人脸检测的可选阈值。


# 加载序列化人脸检测器print("[INFO] loading face detector...")protoPath = os.path.sep.join([detector_path, "deploy.proto.txt"])modelPath = os.path.sep.join([detector_path,"res10_300x300_ssd_iter_140000_fp16.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# 加载我们序列化的人脸编码模型print("[INFO] loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(embedding_path)# 加载实际的人脸识别模型和标签编码器recognizer = pickle.loads(open(recognizer_path, "rb").read())le = pickle.loads(open(label_path, "rb").read())# 加载图像,将其调整为宽度为 600 像素(同时保持纵横比),然后抓取图像尺寸image = cv2.imread(image_path)image = resize(image, width=600)(h, w) = image.shape[:2]# 从图像构建一个 blobimageBlob = cv2.dnn.blobFromImage(    cv2.resize(image, (300, 300)), 1.0, (300, 300),    (104.0, 177.0, 123.0), swapRB=False, crop=False)# 应用 OpenCV 的基于深度学习的人脸检测器来定位输入图像中的人脸detector.setInput(imageBlob)detections = detector.forward()# loop over the detectionsfor i in range(0, detections.shape[2]):    # 提取与预测相关的置信度(即概率)    confidence = detections[0, 0, i, 2]    # filter out weak detections    if confidence > confidence_low:        # 计算人脸边界框的 (x, y) 坐标        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])        (startX, startY, endX, endY) = box.astype("int")        # 提取人脸ROI        face = image[startY:endY, startX:endX]        (fH, fW) = face.shape[:2]        # 确保人脸宽度和高度足够大        if fW < 20 or fH < 20:            continue        # 为人脸 ROI 构造一个 blob,然后将 blob 通过我们的人脸嵌入模型来获得人脸的 128-d 量化        faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96),                                         (0, 0, 0), swapRB=True, crop=False)        embedder.setInput(faceBlob)        vec = embedder.forward()        # 执行分类以识别人脸        preds = recognizer.predict_proba(vec)[0]        j = np.argmax(preds)        proba = preds[j]        name = le.classes_[j]        # 绘制人脸的边界框以及相关的概率        text = "{}: {:.2f}%".format(name, proba * 100)        y = startY - 10 if startY - 10 > 10 else startY + 10        cv2.rectangle(image, (startX, startY), (endX, endY),                      (0, 0, 255), 2)        cv2.putText(image, text, (startX, y),                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)# 展示结果cv2.imshow("Image", image)cv2.waitKey(0)

我们在这个块中加载三个模型。 冒着冗余的风险,我想明确提醒您模型之间的差异:

  • 检测器:一个预训练的 Caffe DL 模型,用于检测人脸在图像中的位置。

  • embedder:一个预训练的 Torch DL 模型,用于计算我们的 128-D 人脸嵌入。

  • 识别器:线性 SVM 人脸识别模型。

1 和 2 都是预先训练好的,这意味着它们是由 OpenCV 按原样提供给您的


将图像加载到内存中并构建一个 blob。


您将从步骤 1 中识别出此块。 我在这里再解释一遍:


然后将置信度与命令行 最小概率检测阈值进行比较,确保计算出的概率大于最小概率。

我们提取人脸 ROI并确保它的空间维度足够大。

下面是识别人脸 ROI代码:

首先,构建一个 faceBlob)并将其通过编码器以生成描述面部的 128 维向量

然后,我们将 vec 通过我们的 SVM 识别器模型,其结果是我们对面部 ROI 中的人的预测。










import numpy as npimport pickleimport timeimport cv2import osdef resize(image, width=None, height=None, inter=cv2.INTER_AREA):    dim = None    (h, w) = image.shape[:2]    # 如果高和宽为None则直接返回    if width is None and height is None:        return image    # 检查宽是否是None    if width is None:        # 计算高度的比例并并按照比例计算宽度        r = height / float(h)        dim = (int(w * r), height)    # 高为None    else:        # 计算宽度比例,并计算高度        r = width / float(w)        dim = (width, int(h * r))    resized = cv2.resize(image, dim, interpolation=inter)    # return the resized image    return resizedout_put='output.avi'video_path = '1.mp4'detector_path = 'face_dete_model'embedding_path = 'nn4.small2.v1.t7'recognizer_path = 'output/recognizer.pickle'label_path = 'output/le.pickle'confidence_low = 0.5# load our serialized face detector from diskprint("[INFO] loading face detector...")protoPath = os.path.sep.join([detector_path, "deploy.proto.txt"])modelPath = os.path.sep.join([detector_path,"res10_300x300_ssd_iter_140000_fp16.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# load our serialized face embedding model from diskprint("[INFO] loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(embedding_path)# load the actual face recognition model along with the label encoderrecognizer = pickle.loads(open(recognizer_path, "rb").read())le = pickle.loads(open(label_path, "rb").read())# initialize the video stream, then allow the camera sensor to warm upprint("[INFO] starting video stream...")#vs = cv2.VideoCapture(0) #摄像头vs=cv2.VideoCapture(video_path)# 视频time.sleep(2.0)# start the FPS throughput estimatorwriter=None# loop over frames from the video file streamwhile True:    # grab the frame from the threaded video stream    ret_val, frame = vs.read()    if ret_val is False:        break    # resize the frame to have a width of 600 pixels (while    # maintaining the aspect ratio), and then grab the image    # dimensions    frame = resize(frame, width=600)    (h, w) = frame.shape[:2]    # construct a blob from the image    imageBlob = cv2.dnn.blobFromImage(        cv2.resize(frame, (300, 300)), 1.0, (300, 300),        (104.0, 177.0, 123.0), swapRB=False, crop=False)    # apply OpenCV's deep learning-based face detector to localize    # faces in the input image    detector.setInput(imageBlob)    detections = detector.forward()    # loop over the detections    for i in range(0, detections.shape[2]):        # extract the confidence (i.e., probability) associated with        # the prediction        confidence = detections[0, 0, i, 2]        # filter out weak detections        if confidence >confidence_low:            # compute the (x, y)-coordinates of the bounding box for            # the face            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])            (startX, startY, endX, endY) = box.astype("int")            # extract the face ROI            face = frame[startY:endY, startX:endX]            (fH, fW) = face.shape[:2]            # ensure the face width and height are sufficiently large            if fW < 20 or fH < 20:                continue            # construct a blob for the face ROI, then pass the blob            # through our face embedding model to obtain the 128-d            # quantification of the face            faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,                                             (96, 96), (0, 0, 0), swapRB=True, crop=False)            embedder.setInput(faceBlob)            vec = embedder.forward()            # perform classification to recognize the face            preds = recognizer.predict_proba(vec)[0]            j = np.argmax(preds)            proba = preds[j]            name = le.classes_[j]            # draw the bounding box of the face along with the            # associated probability            text = "{}: {:.2f}%".format(name, proba * 100)            y = startY - 10 if startY - 10 > 10 else startY + 10            cv2.rectangle(frame, (startX, startY), (endX, endY),                          (0, 0, 255), 2)            cv2.putText(frame, text, (startX, y),                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)            if writer is None and out_put is not None:                fourcc = cv2.VideoWriter_fourcc(*"MJPG")                writer = cv2.VideoWriter(out_put, fourcc, 20,                                         (frame.shape[1], frame.shape[0]), True)                # 如果 writer 不是 None,则将识别出人脸的帧写入磁盘            if writer is not None:                writer.write(frame)    # show the output frame    cv2.imshow("Frame", frame)    key = cv2.waitKey(1) & 0xFF    # if the `q` key was pressed, break from the loop    if key == ord("q"):        break# do a bit of cleanupcv2.destroyAllWindows()vs.release()if writer is not None:    writer.release()



