diff --git a/app/fer_recog_realtime_cam.py~ b/app/fer_recog_realtime_cam.py~ deleted file mode 100644 index 21c147c..0000000 --- a/app/fer_recog_realtime_cam.py~ +++ /dev/null @@ -1,202 +0,0 @@ -import os, sys -import numpy as np -import torch -from fer_pytorch.config.default_cfg import get_fer_cfg_defaults -from fer_pytorch.face_detect import MTCNN -from fer_pytorch.face_align.face_align import FaceAlign -from IPython import embed -import shutil -import cv2 -from fer_pytorch.models.build_model import build_model -import argparse -import random -import time -from pathlib import Path -from fer_pytorch.utils.common import setup_seed -from fer_pytorch.datasets.aug import fer_test_aug - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--config", - default='./configs/mobilev2_4_cls.yml', - type=str, - help="模型配置文件路径") - parser.add_argument('--images', type=str, default='./examples/data/', help='需要进行检测的图片文件夹') - parser.add_argument('--img-size', type=int, default=416, help='输入分辨率大小') - parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)') - parser.add_argument('--output', type=str, default='output', help='检测后的图片或视频保存的路径') - parser.add_argument('--half', default=False, help='是否采用半精度FP16进行推理') - parser.add_argument('--webcam', default=True, help='是否使用摄像头进行检测') - return parser.parse_args() - - -def plot_one_box(x, img, color=None, label=None, line_thickness=None): - # Plots one bounding box on image img - tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line thickness - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) # - clb = c1[0], c2[1] - cv2.rectangle(img, c1, c2, color, thickness=tl) - if label: - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - #c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 - #cv2.rectangle(img, c1, c2, color, -1) # filled - #cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) - clb2 = clb[0] + t_size[0], clb[1] + t_size[1] + 3 - #c1 = c2[0] - t_size[0], c2[1] + t_size[1] + 3 - cv2.rectangle(img, clb, clb2, color, -1) # filled - cv2.putText(img, label, (clb[0], clb2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) - - -class LoadWebcam: # for inference - def __init__(self, img_size=416, half=False, det_type='ctdet'): - self.img_size = img_size - self.half = half # half precision fp16 images - self.det_type = det_type - pipe = 0 # local camera - - self.cap = cv2.VideoCapture(pipe) # video capture object - self.mode = 'webcam' - - def __iter__(self): - self.count = -1 - return self - - def __next__(self): - self.count += 1 - if cv2.waitKey(1) == 27: # esc to quit - cv2.destroyAllWindows() - raise StopIteration - - # Read image - ret_val, img0 = self.cap.read() - assert ret_val, 'Webcam Error' - img_path = 'webcam_%g.jpg' % self.count - img0 = cv2.flip(img0, 1) # flip left-right - print('webcam %g: ' % self.count, end='') - - return img_path, img0, None - - - def __len__(self): - return 0 - - -class LoadImages: - def __init__(self, path, img_size=416, half=False): - raise NotImplementedError - - -def detect(cfg, - images=None, # input folder - output='output', # output folder - fourcc='mp4v', # video codec - img_size=416, - save_txt=False, - save_images=True): - - # Initialize - device = 'cpu' # cpu or gpu - torch.backends.cudnn.benchmark = False # set False for reproducible results - if os.path.exists(output): - shutil.rmtree(output) # remove previous result - os.makedirs(output) - - mtcnn = MTCNN( - image_size=224, - min_face_size=40, - # device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') - device=torch.device('cpu')) - - model = build_model(cfg) - # USE CPU - if device == 'cpu': - model.load_state_dict(torch.load(cfg.TEST.model_load_path, map_location='cpu')) - else: - model.load_state_dict(torch.load(cfg.TEST.model_load_path)) - - - # Eval mode - model.to(device).eval() - - # Half precision - args.half = args.half and device.type != 'cpu' # half precision only supported on CUDA - if args.half: - model.half() - - - # Set Dataloader - vid_path, vid_writer = None, None - if args.webcam: - save_images = False - dataloader = LoadWebcam(img_size=img_size, half=args.half) - else: - dataloader = LoadImages(images, img_size=img_size, half=args.half) - - classes = ['happy', 'anger', 'sad', 'neutral', 'disgust', 'surprised'] # class list - - colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # random color for each class - - aug = fer_test_aug(cfg.DATA.input_size) - - # Run inference - t0 = time.time() - for i, (path, img, vid_cap) in enumerate(dataloader): - t = time.time() - save_path = str(Path(output) / Path(path).name) - # Get detections and align - - if img.dtype != 'uint8': # check whether image or not - raise RuntimeError('dtype of numpy array is not uint8!!! check it !!!') - - bboxs, scores, landmarks = mtcnn.detect(img, landmarks=True) - - cls_faces = [] - if bboxs is not None: - landmarks = landmarks.tolist() - for face_id, bbox in enumerate(bboxs): - ori_landmark = landmarks[face_id] - # embed() - ori_landmark.append([bbox[0], bbox[1]]) - ori_landmark.append([bbox[2], bbox[3]]) - - alignedImg = FaceAlign(img, ori_landmark, 255, use_bbox=True) - - alignedImg = cv2.cvtColor(alignedImg, cv2.COLOR_BGR2RGB) - alignedImg = aug(image=alignedImg)['image'] - alignedImg = alignedImg.transpose((2, 0, 1)) - alignedImg = torch.from_numpy(alignedImg) - alignedImg = alignedImg.unsqueeze(0) - - pred_loggits = model(alignedImg) - pred_loggits = pred_loggits.softmax(dim=-1) - cls = np.argmax(pred_loggits) - cls_faces.append(cls) - print(classes[int(cls)]) - - for face_id, bbox in enumerate(bboxs): - plot_one_box(bbox, img, label=classes[cls_faces[face_id]], color=colors[int(cls_faces[face_id])]) - - if args.webcam: # Show live webcam - cv2.imshow("fer", img) - - -if __name__ == "__main__": - - setup_seed(666) - cfg = get_fer_cfg_defaults() - args = get_args() - if args.config.endswith('\r'): - args.config = args.config[:-1] - print('using config: ',args.config.strip()) - cfg.merge_from_file(args.config) - print(cfg) - - with torch.no_grad(): - detect(cfg, - images=args.images, - img_size=args.img_size, - fourcc=args.fourcc, - output=args.output) - diff --git a/fer_pytorch/face_align/face_align.py~ b/fer_pytorch/face_align/face_align.py~ deleted file mode 100644 index 6f366a9..0000000 --- a/fer_pytorch/face_align/face_align.py~ +++ /dev/null @@ -1,59 +0,0 @@ -import os, sys -# sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') -import cv2 -import numpy -from IPython import embed -import time - -# expect 5 landmark, top-left and bottom-right coord on imgSize [112,96] -ImgSize = [112, 96] -Coord7Point = [[30.2946, 51.6963], - [65.5318, 51.6963], - [48.0252, 71.7366], - [33.5493, 92.3655], - [62.7299, 92.3655], - [ 0, 0], - [ 112, 96]] - - -def OutSizeRescale(OutSize = 255): - global ImgSize - global Coord7Point - sw, sh = float(OutSize) / ImgSize[0], float(OutSize) / ImgSize[1] - Coord7Point = [ [sh * x[0],sw * x[1]] for x in Coord7Point] - ImgSize = [OutSize, OutSize] - - -def TransformationFromPoints(p, q): - - pad = numpy.ones(5) - p = numpy.insert(p, 2, values=pad, axis=1) - q = numpy.insert(q, 2, values=pad, axis=1) - - # 最小二乘 - # M1 = numpy.linalg.inv(p.T*p) - M1 = numpy.linalg.pinv(p.T*p) # pseudo inverse - M2 = p.T*q - M = M1*M2 - return M.T - - -def WarpIm(img_im, orgi_landmarks, tar_landmarks): - pts1 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in orgi_landmarks])) - pts2 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in tar_landmarks])) - M = TransformationFromPoints(pts1, pts2) - dst = cv2.warpAffine(img_im, M[:2], (img_im.shape[1], img_im.shape[0])) - return dst - - -def FaceAlign(img_im, face_landmarks, outimgsize, use_bbox=True): - if not use_bbox: - global Coord7Point - Coord7Point = Coord7Point[:5] - assert len(face_landmarks) == len(Coord7Point) - OutSizeRescale(255) - dst = warp_im(img_im, face_landmarks, Coord7Point) - crop_im = dst[0:ImgSize[0], 0:ImgSize[1]] - return crop_im - - diff --git a/fer_pytorch/utils/face_align.py~ b/fer_pytorch/utils/face_align.py~ deleted file mode 100644 index 80b40d9..0000000 --- a/fer_pytorch/utils/face_align.py~ +++ /dev/null @@ -1,68 +0,0 @@ -import os, sys -# sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') -import cv2 -import torch -from fer_pytorch.face_detect import MTCNN -import numpy -from IPython import embed -import pandas as pd -# from dataset.base import FER_DatasetTest -# from dataset.fer2013 import FER2013 -import h5py -import torch.utils.data as data -from IPython import embed -from PIL import Image - -import time - -# expect 5 landmark, top-left and bottom-right coord on imgSize [112,96] -ImgSize = [112, 96] -Coord7Point = [[30.2946, 51.6963], - [65.5318, 51.6963], - [48.0252, 71.7366], - [33.5493, 92.3655], - [62.7299, 92.3655], - [ 0, 0], - [ 112, 96]] - - -def OutSizeRescale(OutSize = 255): - global ImgSize - global Coord7Point - sw, sh = float(OutSize) / ImgSize[0], float(OutSize) / ImgSize[1] - Coord7Point = [ [sh * x[0],sw * x[1]] for x in Coord7Point] - ImgSize = [OutSize, OutSize] - - -def TransformationFromPoints(p, q): - - pad = numpy.ones(5) - p = numpy.insert(p, 2, values=pad, axis=1) - q = numpy.insert(q, 2, values=pad, axis=1) - - # 最小二乘 - # M1 = numpy.linalg.inv(p.T*p) - M1 = numpy.linalg.pinv(p.T*p) # pseudo inverse - M2 = p.T*q - M = M1*M2 - return M.T - - -def WarpIm(img_im, orgi_landmarks, tar_landmarks): - pts1 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in orgi_landmarks])) - pts2 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in tar_landmarks])) - M = TransformationFromPoints(pts1, pts2) - dst = cv2.warpAffine(img_im, M[:2], (img_im.shape[1], img_im.shape[0])) - return dst - - -def FaceAlign(img_im, face_landmarks, outimgsize, use_bbox=True): - if not use_bbox: - global Coord7Point - Coord7Point = Coord7Point[:5] - OutSizeRescale(255) - dst = warp_im(img_im, face_landmarks, Coord7Point) - crop_im = dst[0:ImgSize[0], 0:ImgSize[1]] - return crop_im - - diff --git a/fer_pytorch/utils/get_landmark_from_dataset.py~ b/fer_pytorch/utils/get_landmark_from_dataset.py~ deleted file mode 100644 index b6d2da2..0000000 --- a/fer_pytorch/utils/get_landmark_from_dataset.py~ +++ /dev/null @@ -1,103 +0,0 @@ -import os, sys -# sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') -import cv2 -import torch -from fer_pytorch.face_detect import MTCNN -import numpy -from IPython import embed -import pandas as pd -# from dataset.base import FER_DatasetTest -# from dataset.fer2013 import FER2013 -import h5py -import torch.utils.data as data -from IPython import embed -from PIL import Image - -import time - -class affine_img_with_five_landmark: - imgSize = [112, 96] - coord5point = [[30.2946, 51.6963], - [65.5318, 51.6963], - [48.0252, 71.7366], - [33.5493, 92.3655], - [62.7299, 92.3655]] - - def __init__(self): - print("cuda : {}".format(torch.cuda.is_available())) - self.OutSizeMapping() - self.mtcnn = MTCNN( - image_size = 224, - min_face_size = 40, -# device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') - device=torch.device( 'cpu') - ) - - def OutSizeMapping(self, OutSize = 255): - sw, sh = float(OutSize) / self.imgSize[0], float(OutSize) / self.imgSize[1] - self.newcoord5point = [ [sh * x[0],sw * x[1]] for x in self.coord5point] - self.imgSize = [OutSize, OutSize] - - def transformation_from_points(self, p, q): - - pad = numpy.ones(5) - p = numpy.insert(p, 2, values=pad, axis=1) - q = numpy.insert(q, 2, values=pad, axis=1) - - # 最小二乘 - M1 = numpy.linalg.inv(p.T*p) - M2 = p.T*q - M = M1*M2 - return M.T - - def warp_im(self, img_im, orgi_landmarks,tar_landmarks): - pts1 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in orgi_landmarks])) - pts2 = numpy.float64(numpy.matrix([[point[0], point[1]] for point in tar_landmarks])) - M = self.transformation_from_points(pts1, pts2) - dst = cv2.warpAffine(img_im, M[:2], (img_im.shape[1], img_im.shape[0])) - return dst - - def FaceAlign(self, img_im, face_landmarks): - dst = self.warp_im(img_im, face_landmarks, self.newcoord5point) - crop_im = dst[0:self.imgSize[0], 0:self.imgSize[1]] - return crop_im - - def face_affiner(self, img, bbox_label=None): - imgSize = [112, 96] - h, w = img.shape[:2] - - if img.dtype != 'uint8': # check whether image or not - raise RuntimeError('dtype of numpy array is not uint8!!! check it !!!') - - bboxs, scores, landmarks = self.mtcnn.detect(img, landmarks=True) - if bboxs is not None and len(bboxs)!=0: - if bbox_label is not None: - y0, x0, y1, x1 = bbox_label - best_area = 100000000000000 - best_id = -1 - for i, box in enumerate(bboxs): - # 计算外接矩 - area = (max(x1, box[2]) - min(x0, box[0]))*(max(y1, box[3]) - min(y0, box[1])) - if area < best_area: - best_area = area - best_id = i - else: - best_id = 0 - else: # bboxs 为空,检测失败 - - return img - - if best_id == -1: - embed() - img = self.FaceAlign(img,landmarks[best_id].tolist()) - return img - - -if __name__ =='__main__': - - affiner = affine_img_with_five_landmark() - img_path = '/media/yz/新加卷/CZCV/FaceExpRecog/project/fer_pytorch/exmaples/data/1.jpg' - img = cv2.imread(img_path) - #embed() - dst=affiner.face_affiner(img) - cv2.imwrite('/media/yz/新加卷/CZCV/FaceExpRecog/project/fer_pytorch/exmaples/data/1_affine.jpg', dst)