first phrase

This commit is contained in:
邓智航
2025-12-23 00:02:36 +08:00
commit 6e882d2aa4
14 changed files with 721 additions and 0 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

173
reproject/analyzer.py Normal file
View File

@@ -0,0 +1,173 @@
import cv2
import mediapipe as mp
import time
import numpy as np
from collections import deque
from geometry_utils import calculate_ear, calculate_mar_simple, estimate_head_pose, LEFT_EYE, RIGHT_EYE
from face_library import FaceLibrary
try:
from new_emotion_test import analyze_emotion_with_hsemotion
HAS_EMOTION_MODULE = True
except ImportError:
print("⚠️ 未找到 new_emotion_test.py情绪功能将不可用")
HAS_EMOTION_MODULE = False
class MonitorSystem:
def __init__(self, face_db):
# 初始化 MediaPipe
self.mp_face_mesh = mp.solutions.face_mesh
self.face_mesh = self.mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
# 初始化人脸底库
self.face_lib = FaceLibrary(face_db)
# 状态变量
self.current_user = None
# --- 时间控制 ---
self.last_identity_check_time = 0
self.IDENTITY_CHECK_INTERVAL = 2.0
self.last_emotion_check_time = 0
self.EMOTION_CHECK_INTERVAL = 3.0
# --- 历史数据 ---
self.HISTORY_LEN = 5
self.ear_history = deque(maxlen=self.HISTORY_LEN)
self.mar_history = deque(maxlen=self.HISTORY_LEN)
# 缓存上一次的检测结果
self.cached_emotion = {
"label": "detecting...",
"va": (0.0, 0.0)
}
def _get_smoothed_value(self, history, current_val):
"""内部函数:计算滑动平均值"""
history.append(current_val)
if len(history) == 0:
return current_val
return sum(history) / len(history)
def process_frame(self, frame):
"""
输入 BGR 图像,返回分析结果字典
"""
h, w = frame.shape[:2]
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = self.face_mesh.process(rgb_frame)
analysis_data = {
"has_face": False,
"ear": 0.0,
"mar": 0.0,
"pose": (0, 0, 0),
"identity": self.current_user,
"emotion_label": self.cached_emotion["label"],
"emotion_va": self.cached_emotion["va"]
}
if not results.multi_face_landmarks:
self.ear_history.clear()
self.mar_history.clear()
return analysis_data
analysis_data["has_face"] = True
landmarks = results.multi_face_landmarks[0].landmark
# 计算 EAR
left_ear = calculate_ear([landmarks[i] for i in LEFT_EYE], w, h)
right_ear = calculate_ear([landmarks[i] for i in RIGHT_EYE], w, h)
raw_ear = (left_ear + right_ear) / 2.0
# 计算 MAR
top = np.array([landmarks[13].x * w, landmarks[13].y * h])
bottom = np.array([landmarks[14].x * w, landmarks[14].y * h])
left = np.array([landmarks[78].x * w, landmarks[78].y * h])
right = np.array([landmarks[308].x * w, landmarks[308].y * h])
raw_mar = calculate_mar_simple(top, bottom, left, right)
# --- 使用 History 进行数据平滑 ---
smoothed_ear = self._get_smoothed_value(self.ear_history, raw_ear)
smoothed_mar = self._get_smoothed_value(self.mar_history, raw_mar)
# 计算头部姿态
pitch, yaw, roll = estimate_head_pose(landmarks, w, h)
analysis_data.update({
"ear": round(smoothed_ear, 4),
"mar": round(smoothed_mar, 4),
"pose": (int(pitch), int(yaw), int(roll))
})
now = time.time()
# --- 身份识别 ---
if now - self.last_identity_check_time > self.IDENTITY_CHECK_INTERVAL:
xs = [l.x for l in landmarks]
ys = [l.y for l in landmarks]
# 计算人脸框
face_loc = (
int(min(ys) * h), int(max(xs) * w),
int(max(ys) * h), int(min(xs) * w)
)
pad = 20
face_loc = (max(0, face_loc[0]-pad), min(w, face_loc[1]+pad),
min(h, face_loc[2]+pad), max(0, face_loc[3]-pad))
match_result = self.face_lib.identify(rgb_frame, face_location=face_loc)
if match_result:
self.current_user = match_result["info"]
self.last_identity_check_time = now
analysis_data["identity"] = self.current_user
# --- 情绪识别 ---
if HAS_EMOTION_MODULE and (now - self.last_emotion_check_time > self.EMOTION_CHECK_INTERVAL):
if results.multi_face_landmarks:
landmarks = results.multi_face_landmarks[0].landmark
xs = [l.x for l in landmarks]
ys = [l.y for l in landmarks]
# 计算裁剪坐标
x_min = int(min(xs) * w)
x_max = int(max(xs) * w)
y_min = int(min(ys) * h)
y_max = int(max(ys) * h)
pad_x = int((x_max - x_min) * 0.2)
pad_y = int((y_max - y_min) * 0.2)
x_min = max(0, x_min - pad_x)
x_max = min(w, x_max + pad_x)
y_min = max(0, y_min - pad_y)
y_max = min(h, y_max + pad_y)
face_crop = frame[y_min:y_max, x_min:x_max]
if face_crop.size > 0:
try:
emo_results = analyze_emotion_with_hsemotion(face_crop)
if emo_results:
top_res = emo_results[0]
self.cached_emotion["label"] = top_res.get("emotion", "unknown")
self.cached_emotion["va"] = top_res.get("vaVal", (0.0, 0.0))
except Exception as e:
print(f"情绪分析出错: {e}")
self.last_emotion_check_time = now
analysis_data["emotion_label"] = self.cached_emotion["label"]
analysis_data["emotion_va"] = self.cached_emotion["va"]
return analysis_data

59
reproject/face_library.py Normal file
View File

@@ -0,0 +1,59 @@
import face_recognition
import numpy as np
class FaceLibrary:
def __init__(self, face_db):
"""
face_db: 字典 {"Name": {"image-path": "x.jpg", "age": 20}, ...}
"""
self.known_encodings = []
self.known_names = []
self.known_infos = []
self._load_database(face_db)
def _load_database(self, face_db):
print("正在加载人脸底库...")
for name, info in face_db.items():
try:
image = face_recognition.load_image_file(info["image-path"])
encodings = face_recognition.face_encodings(image)
if encodings:
self.known_encodings.append(encodings[0])
self.known_names.append(name)
self.known_infos.append(info)
print(f"✅ 已加载: {name}")
else:
print(f"⚠️ 无法提取特征: {name}")
except Exception as e:
print(f"❌ 加载失败 {name}: {e}")
def identify(self, frame_rgb, face_location=None, tolerance=0.5):
"""
frame_rgb: RGB 图片
face_location: (top, right, bottom, left) 或者 None (全图搜索)
"""
if not self.known_encodings:
return None
locations = [face_location] if face_location else None
try:
encodings = face_recognition.face_encodings(frame_rgb, known_face_locations=locations)
if not encodings:
return None
unknown_encoding = encodings[0]
distances = face_recognition.face_distance(self.known_encodings, unknown_encoding)
min_idx = np.argmin(distances)
if distances[min_idx] <= tolerance:
return {
"name": self.known_names[min_idx],
"info": self.known_infos[min_idx],
"distance": distances[min_idx]
}
except Exception as e:
print(f"识别出错: {e}")
return None

View File

@@ -0,0 +1,94 @@
import numpy as np
import cv2
# 左眼
LEFT_EYE = [33, 160, 158, 133, 153, 144]
# 右眼
RIGHT_EYE = [362, 385, 387, 263, 373, 380]
# 嘴唇 (内圈)
LIPS = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13]
def _euclidean_distance(point1, point2):
return np.linalg.norm(point1 - point2)
def calculate_ear(landmarks, width, height):
"""计算眼睛纵横比 EAR"""
# 坐标转换
points = np.array([(p.x * width, p.y * height) for p in landmarks])
# 垂直距离
v1 = _euclidean_distance(points[1], points[5])
v2 = _euclidean_distance(points[2], points[4])
# 水平距离
h = _euclidean_distance(points[0], points[3])
ear = (v1 + v2) / (2.0 * h)
return ear
def calculate_mar(landmarks, width, height):
"""计算嘴巴纵横比 MAR"""
points = np.array([(p.x * width, p.y * height) for p in landmarks])
pass
def calculate_mar_simple(top, bottom, left, right):
h = _euclidean_distance(top, bottom)
w = _euclidean_distance(left, right)
return h / w
# geometry_utils.py 中的 estimate_head_pose 函数替换为以下内容
def estimate_head_pose(landmarks, width, height):
"""
计算头部姿态 (Pitch, Yaw, Roll)
返回单位:角度 (Degree)
"""
# 3D 模型点 (标准人脸模型)
model_points = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -330.0, -65.0), # Chin
(-225.0, 170.0, -135.0), # Left eye left corner
(225.0, 170.0, -135.0), # Right eye right corner
(-150.0, -150.0, -125.0), # Left Mouth corner
(150.0, -150.0, -125.0) # Right mouth corner
])
# MediaPipe 对应的关键点索引
idx_list = [1, 152, 33, 263, 61, 291]
image_points = []
for idx in idx_list:
p = landmarks[idx]
image_points.append((p.x * width, p.y * height))
image_points = np.array(image_points, dtype="double")
focal_length = width
center = (width / 2, height / 2)
camera_matrix = np.array(
[[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]], dtype="double"
)
dist_coeffs = np.zeros((4, 1))
# 求解PnP
success, rotation_vector, translation_vector = cv2.solvePnP(
model_points, image_points, camera_matrix, dist_coeffs
)
rmat, _ = cv2.Rodrigues(rotation_vector)
angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
pitch = angles[0]
yaw = angles[1]
roll = angles[2]
if pitch < -180: pitch += 360
if pitch > 180: pitch -= 360
pitch = 180 - pitch if pitch > 0 else -pitch - 180
if yaw < -180: yaw += 360
if yaw > 180: yaw -= 360
if roll < -180: roll += 360
if roll > 180: roll -= 360
return pitch, yaw, roll

286
reproject/main.py Normal file
View File

@@ -0,0 +1,286 @@
import cv2
import threading
import time
import queue
import socket
import json
import urllib.request
import struct
from analyzer import MonitorSystem
SERVER_HOST = '10.128.50.6'
SERVER_PORT = 65432
API_URL = "http://10.128.50.6:5000/api/states"
CAMERA_ID = "23373333"
BASIC_FACE_DB = {
"Zhihang": {"name": "Zhihang Deng", "age": 20, "image-path": "zhihang.png"},
"Yaoyu": {"name": "Yaoyu Zhang", "age": 20, "image-path": "yaoyu.jpg"},
}
frame_queue = queue.Queue(maxsize=2)
video_queue = queue.Queue(maxsize=1)
data_queue = queue.Queue(maxsize=10)
stop_event = threading.Event()
def capture_thread():
"""
采集线程:优化了分发逻辑,对视频流进行降频处理
"""
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
print("[Capture] 摄像头启动...")
frame_count = 0
while not stop_event.is_set():
ret, frame = cap.read()
if not ret:
break
if not frame_queue.full():
frame_queue.put(frame)
else:
try:
frame_queue.get_nowait()
frame_queue.put(frame)
except queue.Empty:
pass
if frame_count % 2 == 0:
try:
if video_queue.full():
video_queue.get_nowait()
video_queue.put(frame)
except:
pass
frame_count += 1
time.sleep(0.01)
cap.release()
print("[Capture] 线程结束")
def analysis_thread():
"""
核心分析线程:
1. 即使无人脸也发送状态(字段为空字符串)。
2. 队列满时丢弃旧数据,保证数据实时性。
"""
monitor = MonitorSystem(BASIC_FACE_DB)
print("[Analysis] 分析系统启动...")
while not stop_event.is_set():
try:
frame = frame_queue.get(timeout=1)
except queue.Empty:
continue
# 核心分析
result = monitor.process_frame(frame)
payload = {
"id": CAMERA_ID,
"time": time.strftime("%Y-%m-%d %H:%M:%S"),
"name": "",
"ear": "",
"mar": "",
"pose": "",
"emo_label": "",
"emo_va": ""
}
if result["has_face"] and result["identity"]:
payload.update({
"name": result["identity"]["name"],
"ear": result["ear"],
"mar": result["mar"],
"pose": result["pose"],
"emo_label": result["emotion_label"],
"emo_va": result["emotion_va"]
})
elif result["has_face"]:
payload.update({
"name": "Unknown",
"ear": result["ear"],
"mar": result["mar"],
"pose": result["pose"],
"emo_label": result["emotion_label"],
"emo_va": result["emotion_va"]
})
if data_queue.full():
try:
_ = data_queue.get_nowait()
except queue.Empty:
pass
data_queue.put(payload)
draw_debug_info(frame, result)
cv2.imshow("Monitor Client", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
stop_event.set()
cv2.destroyAllWindows()
print("[Analysis] 分析线程结束")
def video_stream_thread():
"""
发送线程:优化了 Socket 设置和压缩参数
"""
print(f"[Video] 准备连接服务器 {SERVER_HOST}:{SERVER_PORT} ...")
while not stop_event.is_set():
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
s.connect((SERVER_HOST, SERVER_PORT))
print(f"[Video] 已连接")
camera_id_bytes = CAMERA_ID.encode('utf-8')
while not stop_event.is_set():
try:
frame = video_queue.get(timeout=1)
small_frame = cv2.resize(frame, (320, 240))
ret, buffer = cv2.imencode('.jpg', small_frame, [cv2.IMWRITE_JPEG_QUALITY, 50])
if not ret: continue
frame_bytes = buffer.tobytes()
header_id_len = len(camera_id_bytes).to_bytes(4, 'big')
header_frame_len = len(frame_bytes).to_bytes(4, 'big')
packet = header_id_len + camera_id_bytes + header_frame_len + frame_bytes
s.sendall(packet)
except queue.Empty:
continue
except Exception as e:
print(f"[Video] 发送断开: {e}")
break
except Exception as e:
print(f"[Video] 重连中... {e}")
time.sleep(3)
print("[Video] 线程结束")
def data_upload_thread():
"""
周期性爆发模式
逻辑:每隔 30 秒,连续发送 5 次数据(间隔 1 秒)。
由于 analysis_thread 保证了队列里总是最新数据,这里取到的就是实时状态。
"""
print("[Data] 数据上报线程启动 (周期模式: 休眠30s -> 连发5次)")
LONG_SLEEP = 30
BURST_COUNT = 5
BURST_GAP = 1
while not stop_event.is_set():
# --- 阶段 1: 长休眠 (30秒) ---
if stop_event.wait(LONG_SLEEP):
break
# --- 阶段 2: 爆发发送 (5次) ---
print(f"[Data] 开始上报周期 (连发 {BURST_COUNT} 次)...")
try:
while not data_queue.empty():
data_queue.get_nowait()
except queue.Empty:
pass
time.sleep(0.1)
for i in range(BURST_COUNT):
if stop_event.is_set():
break
try:
data = data_queue.get(timeout=1.5)
try:
req = urllib.request.Request(
url=API_URL,
data=json.dumps(data).encode('utf-8'),
headers={'Content-Type': 'application/json'},
method='POST'
)
with urllib.request.urlopen(req, timeout=2) as resp:
pass
# 打印日志
name_info = data['name'] if data['name'] else "NO-FACE"
print(f"[Data Upload {i+1}/{BURST_COUNT}] {name_info} | Time:{data['time']}")
except Exception as e:
print(f"[Data] Upload Error: {e}")
except queue.Empty:
print(f"[Data] 队列为空,跳过第 {i+1} 次发送")
if i < BURST_COUNT - 1:
stop_event.wait(BURST_GAP)
print("[Data] 数据上报线程结束")
def draw_debug_info(frame, result):
"""在画面上画出即时数据"""
if not result["has_face"]:
cv2.putText(frame, "NO FACE", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
return
# 显示身份
id_text = result["identity"]["name"] if result["identity"] else "Unknown"
color = (0, 255, 0) if result["identity"] else (0, 255, 255)
cv2.putText(frame, f"User: {id_text}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
# 显示数据
cv2.putText(frame, f"EAR: {result['ear']}", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 1)
cv2.putText(frame, f"MAR: {result['mar']}", (20, 95), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 1)
if result['ear'] < 0.15:
cv2.putText(frame, "EYE CLOSE", (250, 250), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
p, y, r = result["pose"]
cv2.putText(frame, f"Pose: P{p} Y{y} R{r}", (20, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 1)
emo = result.get("emotion_label", "N/A")
va = result.get("emotion_va", (0,0))
# 显示格式: Emo: happy (-0.5, 0.2)
emo_text = f"Emo: {emo} ({va[0]:.2f}, {va[1]:.2f})"
cv2.putText(frame, emo_text, (20, 145), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 1)
if __name__ == "__main__":
t1 = threading.Thread(target=capture_thread, daemon=True)
t2 = threading.Thread(target=analysis_thread, daemon=True)
t3 = threading.Thread(target=video_stream_thread, daemon=True)
t4 = threading.Thread(target=data_upload_thread, daemon=True)
t1.start()
t2.start()
t3.start()
t4.start()
try:
while not stop_event.is_set():
time.sleep(1)
except KeyboardInterrupt:
print("停止程序...")
stop_event.set()
t1.join()
t2.join()
t3.join()
t4.join()

View File

@@ -0,0 +1,109 @@
import cv2
import numpy as np
import math
from hsemotion_onnx.facial_emotions import HSEmotionRecognizer
EMOTION_VA_MAP = {
'happy': (0.85, 0.60),
'sad': (-0.75, -0.60),
'angry': (-0.70, 0.80),
'fear': (-0.65, 0.75),
'surprise': (0.20, 0.85),
'disgust': (-0.80, 0.40),
'neutral': (0.00, 0.00),
'contempt': (-0.60, 0.50),
}
EMOTION_HANDLE = {
'happiness': 'happy',
'sadness': 'sad',
'anger': 'angry',
'fear': 'fear',
'surprise': 'surprise',
'disgust': 'disgust',
'neutral': 'neutral',
'contempt': 'contempt',
}
def get_fine_grained_emotion(valence, arousal):
radius = math.sqrt(valence**2 + arousal**2)
angle = math.degrees(math.atan2(arousal, valence))
if radius < 0.25:
return "neutral"
if 0 <= angle < 90:
if angle > 60: return "excited"
elif angle > 30: return "happy"
else: return "pleased"
elif 90 <= angle <= 180:
if angle > 150: return "nervous"
elif angle > 120: return "angry"
else: return "annoying"
elif -180 <= angle < -90:
if angle < -150: return "sad"
elif angle < -120: return "bored"
else: return "sleepy"
elif -90 <= angle < 0:
if angle < -60: return "calm"
elif angle < -30: return "peaceful"
else: return "relaxed"
return "neutral"
class EmotionAnalyzer:
def __init__(self):
print("正在加载 HSEmotion-ONNX 模型...")
self.fer = HSEmotionRecognizer(model_name='enet_b0_8_best_vgaf')
print("HSEmotion-ONNX 模型加载完成")
def calculate_va_score(self, emotion_prob):
valence_sum = 0.0
arousal_sum = 0.0
total_prob = 0.0
for emotion, prob in emotion_prob.items():
key = EMOTION_HANDLE.get(emotion.lower(), emotion.lower())
if key in EMOTION_VA_MAP:
v, a = EMOTION_VA_MAP[key]
valence_sum += v * prob
arousal_sum += a * prob
total_prob += prob
if total_prob == 0:
return 0.0, 0.0
return valence_sum, arousal_sum
def analyze(self, face_img_bgr):
if face_img_bgr is None or face_img_bgr.size == 0:
return []
face_img_rgb = cv2.cvtColor(face_img_bgr, cv2.COLOR_BGR2RGB)
# predict_emotions 返回主要情绪标签和概率数组
emotion_raw, scores = self.fer.predict_emotions(face_img_rgb, logits=False)
probabilities = {}
for idx, score in enumerate(scores):
raw_label = self.fer.idx_to_class[idx]
key = EMOTION_HANDLE.get(raw_label.lower(), raw_label.lower())
probabilities[key] = float(score)
valence, arousal = self.calculate_va_score(probabilities)
fine_grained_label = get_fine_grained_emotion(valence, arousal)
result = {
"box": {},
"vaVal": (round(valence, 4), round(arousal, 4)),
"probabilities": probabilities,
"dominant_emotion": EMOTION_HANDLE.get(emotion_raw.lower(), emotion_raw.lower()),
"emotion": fine_grained_label
}
return [result]
analyzer_instance = EmotionAnalyzer()
def analyze_emotion_with_hsemotion(face_crop_bgr):
return analyzer_instance.analyze(face_crop_bgr)

BIN
reproject/requirement.txt Normal file

Binary file not shown.

BIN
reproject/va_reference.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

BIN
reproject/yaoyu.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

BIN
reproject/zhihang.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 171 KiB