elastic search — how to improve face recognition accuracy in facenet

3 min readFeb 20, 2022

From my experience (I’m using face_tecognition/ dlib for making 128d face encodings) Elasticsearch works perfectly, even much better then instant dlib methods:

s_body = {"size": app_conf.ELASTIC_MAX_RESULTS, "min_score": 1 - tolerance, "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "1 / (1 + l2norm(params.query_vector, 'face_vector'))", "params": {"query_vector": wanted_encoding} } } } } res = es.search(index="idx", body=s_body) ... for hit in res["hits"]["hits"]: ...

Also, as I already wrote I’m using standrd pre-trained face_recognition_model. However, some preprocessing measures are implemented too:

Args: img ([nparray]): [base input phoyo] box_index ([int]): [number of facebox is multiface] eyes ([list]): [coordinates of eyes ("left_eye" (x, y) - "right_eye" (x, y))] Returns: [boolean]: [True if face is rotated too much: more then 5 fegrees] [nparray]: [corrected/rotated to upright position image] """ reload(app_conf) if not eyes: # no eyes data being provided boxes, eyes = mp_boxes(img, fl_use_dlib=True, fl_use_mediapipe=False, fl_verbous=False) if eyes: left_eye_x = eyes[box_index][0][0] left_eye_y = eyes[box_index][0][1] left_eye = (left_eye_x, left_eye_y) right_eye_x = eyes[box_index][1][0] right_eye_y = eyes[box_index][1][1] right_eye = (right_eye_x, right_eye_y) if checksideview: #check if face rotated tooo much left_chin_x = eyes[box_index][2][0] left_chin_y = eyes[box_index][2][1] left_chin = (left_chin_x, left_chin_y) right_chin_x = eyes[box_index][3][0] right_chin_y =eyes[box_index][3][1] right_chin = (right_chin_x, right_chin_y) right_dist = euclidean_distance(right_chin, right_eye) left_dist = euclidean_distance(left_chin, left_eye) k_side_face = right_dist / left_dist if 0.5 > k_side_face or k_side_face > 2.0: #change acorrding to exp. return False, img # face is "from a side" tooo much if left_eye_y > right_eye_y: point_3rd = (right_eye_x, left_eye_y) direction = -1 #rotate clockwise else: point_3rd = (left_eye_x, right_eye_y) direction = 1 #rotate counter clockwise a = euclidean_distance(eyes[box_index][0], point_3rd) b = euclidean_distance(eyes[box_index][1], point_3rd) c = euclidean_distance(eyes[box_index][1], eyes[box_index][0]) cos_a = (b*b + c*c - a*a)/(2*b*c) angle = np.arccos(cos_a) angle = (angle * 180) / pi if angle < app_conf.MIN_FACE_ROTATION: #if rotation less then 5 degrees return True, img if direction == -1: angle = 90 - angle new_img = Image.fromarray(img, mode="RGB") new_img = np.array(new_img.rotate(direction * angle)) new_img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB) return True, new_img else: return True, img #do nothing

And “eyes” I’m getting from:

def mp_boxes(image, confid=0.4, fl_use_dlib=True, fl_use_mediapipe=True, fl_verbous=True): """[Function uses Medipipe CNN to find out all faces boxes on the given image. It's about 50 times faster even on CPU but sometime consupms huge amount of RAM... then original dlib / Face_recognition functions.] If Mediapipe found no face boxes (it's possible and depends of image features, the function consequently tries to use dlib CNN and HOG models for that.) Args: image ([np.array]): [image in memory to be proccessed] Returns: boxes [list]: [list of coordinates of face boundaries boxes] eyes [list]: [list of (left_eye, right_eye) for every box] """ reload(app_conf) mod5_68 = "large" boxes = [] dboxes = [] eyes = [] if fl_use_dlib: if DLIB_USE_CUDA: #tryin to use CNN on GPU first try: dboxes = frfl(image, number_of_times_to_upsample=1, model="cnn") for box in dboxes: #checking MIN_FACEBOX_DIM conditions if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib CNN being used and found %dface box" % (datetime.now(), len(boxes))) except: dboxes = frfl(image, number_of_times_to_upsample=1, model="hog") for box in dboxes: if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib HOG being used and found %d face box" % (datetime.now(), len(boxes))) else: dboxes = frfl(image, number_of_times_to_upsample=1, model="hog") for box in dboxes: if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib HOG being used and found %d face box" % (datetime.now(), len(boxes))) del(dboxes) if fl_use_mediapipe: face_detection = mp_face_detection.FaceDetection(min_detection_confidence=confid) iheigth, iwidth = image.shape[:2] results = face_detection.process(image) if results.detections: # further with results.detections: for d in results.detections: xmin = int(iwidth * d.location_data.relative_bounding_box.xmin) ymin = int(iheigth * d.location_data.relative_bounding_box.ymin) ymax = ymin + int(iheigth * d.location_data.relative_bounding_box.height) xmax = xmin + int(iwidth * d.location_data.relative_bounding_box.width) if (xmax - xmin) > app_conf.MIN_FACEBOX_DIM[0] and (ymax - ymin) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append([ymin, xmax, ymax, xmin]) else: if fl_verbous: print("[INFO] %s: Mediapipe found nothing :(" % datetime.now()) del(face_detection) del(results) if boxes: for box in boxes: basic_landmarks = frfland(image, [box], mod5_68) eyes.append((basic_landmarks[0]["left_eye"][2], basic_landmarks[0]["right_eye"][1], basic_landmarks[0]["chin"][16], basic_landmarks[0]["chin"][0])) if fl_verbous: print("[INFO] %s: Mediapipe addded some too and now totally %d face boxes are collected." % (datetime.now(), len(boxes))) return boxes, eyes else: return None, None

Where

frfland = face_recognition.face_landmarks() mp_face_detection = mediaoioe.solutions.face_detection

Minimal facebox is 30 x 30 pixel
Maximum picture size is 1280 x 1024 (using cv2.resize). Such reduction is criticaly important for the case of CUDA picture processing: my NVODIA with 4 GB onboard can’t work with bigger files.
I’m using both dlib and Mediapipe face box detection: sometimes it is useful :)

Originally published at https://stack.readbytes.net on February 20, 2022.

elastic search — how to improve face recognition accuracy in facenet

Written by Alex Weix