4.20团队开发报告

1.昨天遇到的问题

昨天的开发还算顺利,基于我个人的能力,那个框是我目前能画出来的最好的框了,所以我打算不再调试了。

2。今天的任务

今天开始对专注度算法的编写。

3.代码

import torch


class ClassroomActionClassifier:
def __init__(self, weights, device):
self.device = device
self.model = torch.jit.load(weights).to(device)
_ = self.model(torch.zeros(1, 28).to(self.device))

@staticmethod
def preprocess(keypoints):
x_min = torch.min(keypoints[:, :, 0], dim=1).values
y_min = torch.min(keypoints[:, :, 1], dim=1).values
x_max = torch.max(keypoints[:, :, 0], dim=1).values
y_max = torch.max(keypoints[:, :, 1], dim=1).values
x1_y1 = torch.stack([x_min, y_min], dim=1).unsqueeze(1)
width = torch.stack([x_max - x_min, y_max - y_min], dim=1).unsqueeze(1)
scaled_keypoints = (keypoints - x1_y1) / width
scaled_keypoints = (scaled_keypoints - 0.5) / 0.5
return scaled_keypoints.flatten(start_dim=1)

def classify(self, keypoints):
return self.model(self.preprocess(keypoints)).detach().cpu()
import numpy as np
import torch


class PFLDFaceAligner:

def __init__(self, weights, device):
self.device = device
self.model = torch.jit.load(weights).to(device)
_ = self.model(torch.zeros(1, 3, 112, 112).to(self.device))

@staticmethod
def preprocess(faces):
return torch.from_numpy(faces.transpose((0, 3, 1, 2)) / 255.0)

def align(self, faces):
result = self.model(self.preprocess(faces).to(device=self.device, dtype=torch.float32)).detach().cpu()
return result.view(-1, 68, 2)


class MobileNetSEFaceAligner:
mean = np.asarray([0.485, 0.456, 0.406]).reshape(1, 1, 1, 3)
std = np.asarray([0.229, 0.224, 0.225]).reshape(1, 1, 1, 3)

def __init__(self, weights, device):
self.device = device
self.model = torch.jit.load(weights).to(device)
_ = self.model(torch.zeros(1, 3, 56, 56).to(self.device))

def preprocess(self, faces):
faces = (faces / 255 - self.mean) / self.std
return torch.from_numpy(faces.transpose((0, 3, 1, 2)))

def align(self, faces):
result = self.model(self.preprocess(faces).to(device=self.device, dtype=torch.float32)).detach().cpu()
return result.view(-1, 68, 2)
import os

import numpy as np

from silent_face.src.anti_spoof_predictor import AntiSpoofPredictor
from silent_face.src.utility import parse_model_name
from utils.img_cropper import CropImage

wanted_model_index = [0]


class SilentFaceDetector:
def __init__(self, device_id='cpu', model_dir='weights/anti_spoof_models'):
self.models = []
self.params = []
for i, model_name in enumerate(os.listdir(model_dir)):
if i not in wanted_model_index:
continue
self.models.append(AntiSpoofPredictor(device_id, os.path.join(model_dir, model_name)))
self.params.append(parse_model_name(model_name))

def detect(self, frame, face_location):
face_location = face_location.copy()
face_location[2:] = face_location[2:] - face_location[:2]
prediction = np.zeros((1, 3))
# sum the prediction from single model's result
for model, (h_input, w_input, model_type, scale) in zip(self.models, self.params):
param = {
"org_img": frame,
"bbox": face_location,
"scale": scale,
"out_w": w_input,
"out_h": h_input,
"crop": True,
}
if scale is None:
param["crop"] = False
img = CropImage.crop(**param)
prediction += model.predict(img)

# draw result of prediction
label = np.argmax(prediction)
return label, prediction[0][label] / len(self.models)
import cv2
import numpy as np
import torch
from torchvision import transforms

from utils.general import non_max_suppression


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)

# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)

# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios

dw /= 2 # divide padding into 2 sides
dh /= 2

if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)


class YoloV5Detector:
def __init__(self, weights, device):
self.device = device
self.model = torch.jit.load(weights).to(device)
self.conf_thres = 0.35
self.iou_thres = 0.45
self.agnostic_nms = False
self.max_det = 1000
self.classes = [0]
self.transformer = transforms.Compose([transforms.ToTensor()])
# 预热
_ = self.model(torch.zeros(1, 3, 640, 480).to(self.device))

def preprocess_img(self, img):
# img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img]
# # Stack
# img = np.stack(img, 0)
# to_tensor
# Convert
# img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) / 255 # BGR to RGB, to bsx3x416x416
# img = np.ascontiguousarray(img)

# if img.ndim == 3:
# img = np.expand_dims(img, 0)
# img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) / 255 # BGR to RGB, to bsx3x416x416
# img = np.ascontiguousarray(img)

return self.transformer(img[:, :, ::-1].copy()).unsqueeze(0).to(self.device, dtype=torch.float32)

def detect(self, img):
# img0 = img.copy()
# 预处理
img = self.preprocess_img(img)
# 检测
pred = self.model(img)[0]
# NMS
pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, self.classes, self.agnostic_nms,
max_det=self.max_det)
# for i, det in enumerate(pred):
# if len(det):
# # Rescale boxes from img_size to im0 size
# det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0[i].shape).round()
pred = pred[0].detach().cpu()
return pred
posted @   Joranger  阅读(76)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 使用C#创建一个MCP客户端
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示