elastic search — how to improve face recognition accuracy in facenet

s_body = {"size": app_conf.ELASTIC_MAX_RESULTS, "min_score": 1 - tolerance, "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "1 / (1 + l2norm(params.query_vector, 'face_vector'))", "params": {"query_vector": wanted_encoding} } } } } res = es.search(index="idx", body=s_body) ... for hit in res["hits"]["hits"]: ...
Args: img ([nparray]): [base input phoyo] box_index ([int]): [number of facebox is multiface] eyes ([list]): [coordinates of eyes ("left_eye" (x, y) - "right_eye" (x, y))] Returns: [boolean]: [True if face is rotated too much: more then 5 fegrees] [nparray]: [corrected/rotated to upright position image] """ reload(app_conf) if not eyes: # no eyes data being provided boxes, eyes = mp_boxes(img, fl_use_dlib=True, fl_use_mediapipe=False, fl_verbous=False) if eyes: left_eye_x = eyes[box_index][0][0] left_eye_y = eyes[box_index][0][1] left_eye = (left_eye_x, left_eye_y) right_eye_x = eyes[box_index][1][0] right_eye_y = eyes[box_index][1][1] right_eye = (right_eye_x, right_eye_y) if checksideview: #check if face rotated tooo much left_chin_x = eyes[box_index][2][0] left_chin_y = eyes[box_index][2][1] left_chin = (left_chin_x, left_chin_y) right_chin_x = eyes[box_index][3][0] right_chin_y =eyes[box_index][3][1] right_chin = (right_chin_x, right_chin_y) right_dist = euclidean_distance(right_chin, right_eye) left_dist = euclidean_distance(left_chin, left_eye) k_side_face = right_dist / left_dist if 0.5 > k_side_face or k_side_face > 2.0: #change acorrding to exp. return False, img # face is "from a side" tooo much if left_eye_y > right_eye_y: point_3rd = (right_eye_x, left_eye_y) direction = -1 #rotate clockwise else: point_3rd = (left_eye_x, right_eye_y) direction = 1 #rotate counter clockwise a = euclidean_distance(eyes[box_index][0], point_3rd) b = euclidean_distance(eyes[box_index][1], point_3rd) c = euclidean_distance(eyes[box_index][1], eyes[box_index][0]) cos_a = (b*b + c*c - a*a)/(2*b*c) angle = np.arccos(cos_a) angle = (angle * 180) / pi if angle < app_conf.MIN_FACE_ROTATION: #if rotation less then 5 degrees return True, img if direction == -1: angle = 90 - angle new_img = Image.fromarray(img, mode="RGB") new_img = np.array(new_img.rotate(direction * angle)) new_img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB) return True, new_img else: return True, img #do nothing
def mp_boxes(image, confid=0.4, fl_use_dlib=True, fl_use_mediapipe=True, fl_verbous=True): """[Function uses Medipipe CNN to find out all faces boxes on the given image. It's about 50 times faster even on CPU but sometime consupms huge amount of RAM... then original dlib / Face_recognition functions.] If Mediapipe found no face boxes (it's possible and depends of image features, the function consequently tries to use dlib CNN and HOG models for that.) Args: image ([np.array]): [image in memory to be proccessed] Returns: boxes [list]: [list of coordinates of face boundaries boxes] eyes [list]: [list of (left_eye, right_eye) for every box] """ reload(app_conf) mod5_68 = "large" boxes = [] dboxes = [] eyes = [] if fl_use_dlib: if DLIB_USE_CUDA: #tryin to use CNN on GPU first try: dboxes = frfl(image, number_of_times_to_upsample=1, model="cnn") for box in dboxes: #checking MIN_FACEBOX_DIM conditions if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib CNN being used and found %dface box" % (datetime.now(), len(boxes))) except: dboxes = frfl(image, number_of_times_to_upsample=1, model="hog") for box in dboxes: if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib HOG being used and found %d face box" % (datetime.now(), len(boxes))) else: dboxes = frfl(image, number_of_times_to_upsample=1, model="hog") for box in dboxes: if (box[1] - box[3]) > app_conf.MIN_FACEBOX_DIM[0] and (box[2] - box[0]) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append(box) if fl_verbous: print("[INFO] %s: dlib HOG being used and found %d face box" % (datetime.now(), len(boxes))) del(dboxes) if fl_use_mediapipe: face_detection = mp_face_detection.FaceDetection(min_detection_confidence=confid) iheigth, iwidth = image.shape[:2] results = face_detection.process(image) if results.detections: # further with results.detections: for d in results.detections: xmin = int(iwidth * d.location_data.relative_bounding_box.xmin) ymin = int(iheigth * d.location_data.relative_bounding_box.ymin) ymax = ymin + int(iheigth * d.location_data.relative_bounding_box.height) xmax = xmin + int(iwidth * d.location_data.relative_bounding_box.width) if (xmax - xmin) > app_conf.MIN_FACEBOX_DIM[0] and (ymax - ymin) > app_conf.MIN_FACEBOX_DIM[1]: boxes.append([ymin, xmax, ymax, xmin]) else: if fl_verbous: print("[INFO] %s: Mediapipe found nothing :(" % datetime.now()) del(face_detection) del(results) if boxes: for box in boxes: basic_landmarks = frfland(image, [box], mod5_68) eyes.append((basic_landmarks[0]["left_eye"][2], basic_landmarks[0]["right_eye"][1], basic_landmarks[0]["chin"][16], basic_landmarks[0]["chin"][0])) if fl_verbous: print("[INFO] %s: Mediapipe addded some too and now totally %d face boxes are collected." % (datetime.now(), len(boxes))) return boxes, eyes else: return None, None
frfland = face_recognition.face_landmarks() mp_face_detection = mediaoioe.solutions.face_detection
  1. Minimal facebox is 30 x 30 pixel
  2. Maximum picture size is 1280 x 1024 (using cv2.resize). Such reduction is criticaly important for the case of CUDA picture processing: my NVODIA with 4 GB onboard can’t work with bigger files.
  3. I’m using both dlib and Mediapipe face box detection: sometimes it is useful :)

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store