本文目录导读:

我来介绍几种Python实现手势识别的方法,从简单到复杂:
使用OpenCV + MediaPipe(推荐方案)
这是最简单且准确率高的方法,适合新手:
import cv2
import mediapipe as mp
class HandGestureRecognizer:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
self.mp_draw = mp.solutions.drawing_utils
def recognize_gesture(self, hand_landmarks):
"""识别手势"""
# 获取手指状态(伸出/弯曲)
fingers = []
# 大拇指
if hand_landmarks.landmark[4].x < hand_landmarks.landmark[3].x:
fingers.append(1) # 大拇指伸出
else:
fingers.append(0)
# 其他四指
for i in range(8, 21, 4):
if hand_landmarks.landmark[i].y < hand_landmarks.landmark[i-2].y:
fingers.append(1)
else:
fingers.append(0)
# 根据手指状态判断手势
if fingers == [0, 1, 1, 0, 0]:
return "剪刀✌️"
elif fingers == [0, 0, 0, 0, 0]:
return "拳头✊"
elif fingers == [1, 1, 1, 1, 1]:
return "手掌🖐️"
elif fingers == [0, 1, 0, 0, 0]:
return "点赞👍"
else:
return "其他手势"
def run_camera(self):
"""运行摄像头手势识别"""
cap = cv2.VideoCapture(0)
while True:
success, img = cap.read()
if not success:
break
# 翻转图像
img = cv2.flip(img, 1)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 检测手势
results = self.hands.process(img_rgb)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# 绘制手部关键点
self.mp_draw.draw_landmarks(
img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS
)
# 识别手势
gesture = self.recognize_gesture(hand_landmarks)
# 显示手势名称
cv2.putText(img, gesture, (10, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("Gesture Recognition", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# 使用示例
if __name__ == "__main__":
recognizer = HandGestureRecognizer()
recognizer.run_camera()
使用OpenCV + 图像处理(基于轮廓)
更基础的方法,不依赖第三方库(除了OpenCV):
import cv2
import numpy as np
class SimpleGestureRecognizer:
def __init__(self):
self.cap = cv2.VideoCapture(0)
def process_image(self, img):
"""图像预处理"""
# 转换颜色空间
img_ycbcr = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
# 皮肤颜色检测(YCrCb空间)
lower = np.array([0, 133, 77])
upper = np.array([255, 173, 127])
mask = cv2.inRange(img_ycbcr, lower, upper)
# 形态学操作
kernel = np.ones((5, 5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
return mask
def find_contours(self, mask):
"""查找轮廓"""
contours, _ = cv2.findContours(
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
if contours:
max_contour = max(contours, key=cv2.contourArea)
if cv2.contourArea(max_contour) > 3000: # 最小面积阈值
return max_contour
return None
def count_fingers(self, contour):
"""计算手指数量"""
# 获取凸包
hull = cv2.convexHull(contour, returnPoints=False)
defects = cv2.convexityDefects(contour, hull)
if defects is None:
return 0
# 统计凹陷点数量
count = 0
for i in range(defects.shape[0]):
s, e, f, d = defects[i, 0]
far = tuple(contour[f][0])
# 计算角度
a = np.linalg.norm(contour[s][0] - contour[f][0])
b = np.linalg.norm(contour[e][0] - contour[f][0])
c = np.linalg.norm(contour[s][0] - contour[e][0])
angle = np.arccos((a**2 + b**2 - c**2) / (2 * a * b))
if angle < np.pi/2: # 角度小于90度
count += 1
return count + 1 # 手指数量 = 凹陷点数量 + 1
def run(self):
"""运行识别程序"""
while True:
ret, frame = self.cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
# 处理图像
mask = self.process_image(frame)
# 查找轮廓
contour = self.find_contours(mask)
if contour is not None:
# 画轮廓
cv2.drawContours(frame, [contour], -1, (0, 255, 0), 2)
# 计算手指
finger_count = self.count_fingers(contour)
# 显示结果
cv2.putText(frame, f"Fingers: {finger_count}", (10, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 显示图像
cv2.imshow("Gesture Recognition", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
self.cap.release()
cv2.destroyAllWindows()
# 使用示例
if __name__ == "__main__":
recognizer = SimpleGestureRecognizer()
recognizer.run()
使用TensorFlow训练手势分类模型
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
import numpy as np
import os
class GestureModelTrainer:
def __init__(self, num_classes=5):
self.num_classes = num_classes
def create_model(self):
"""创建CNN模型"""
model = keras.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(self.num_classes, activation='softmax')
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
return model
def train(self, data_dir, epochs=10):
"""训练模型"""
# 数据增强
data_gen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
validation_split=0.2
)
# 加载数据
train_gen = data_gen.flow_from_directory(
data_dir,
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
subset='training'
)
val_gen = data_gen.flow_from_directory(
data_dir,
target_size=(64, 64),
batch_size=32,
class_mode='categorical',
subset='validation'
)
# 创建并训练模型
model = self.create_model()
history = model.fit(
train_gen,
validation_data=val_gen,
epochs=epochs
)
# 保存模型
model.save('gesture_model.h5')
return model
class GesturePredictor:
def __init__(self, model_path='gesture_model.h5'):
self.model = keras.models.load_model(model_path)
self.gesture_names = ['fist', 'palm', 'thumb', 'peace', 'ok']
def predict(self, image):
"""预测单个图像"""
# 预处理
img = cv2.resize(image, (64, 64))
img = img / 255.0
img = np.expand_dims(img, axis=0)
# 预测
prediction = self.model.predict(img)
class_idx = np.argmax(prediction[0])
confidence = prediction[0][class_idx]
return self.gesture_names[class_idx], confidence
def run_realtime(self):
"""实时识别"""
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
# 预测手势
gesture, confidence = self.predict(frame)
# 显示结果
cv2.putText(frame, f"{gesture} ({confidence:.2f})", (10, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("Gesture Recognition", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
安装依赖
# 基础依赖 pip install opencv-python mediapipe numpy # 如果使用TensorFlow pip install tensorflow # 其他有用的库 pip install sklearn matplotlib
实用技巧和注意事项
手势识别优化建议:
class GestureOptimizer:
@staticmethod
def preprocess_image(img):
"""图像预处理优化"""
# 高斯模糊减少噪声
blurred = cv2.GaussianBlur(img, (5, 5), 0)
# 直方图均衡化增强对比度
lab = cv2.cvtColor(blurred, cv2.COLOR_BGR2LAB)
lab[:, :, 0] = cv2.equalizeHist(lab[:, :, 0])
enhanced = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
return enhanced
@staticmethod
def detect_hand_region(img):
"""检测手部区域(ROI)"""
# 使用YOLO或简单的边界框检测
# 这里用简单的皮肤检测
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, lower_skin, upper_skin)
# 找到最大轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
if contours:
max_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(max_contour)
return (x, y, w, h)
return None
完整示例:综合应用
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
class AdvancedGestureRecognizer:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.7,
min_tracking_confidence=0.5
)
self.mp_draw = mp.solutions.drawing_utils
# 手势状态
self.gesture_history = deque(maxlen=5)
self.current_gesture = None
self.gesture_count = 0
def get_gesture(self, landmarks, img_shape):
"""高级手势识别"""
h, w, _ = img_shape
finger_tips = [4, 8, 12, 16, 20] # 指尖
finger_mcp = [2, 5, 9, 13, 17] # 指节
fingers = []
for i in range(5):
if landmarks[finger_tips[i]].y < landmarks[finger_mcp[i]].y:
fingers.append(1)
else:
fingers.append(0)
# 计算手势
if sum(fingers) == 0:
return "fist"
elif sum(fingers) == 5:
return "palm"
elif fingers == [1, 1, 0, 0, 0]:
return "peace"
elif fingers == [0, 1, 0, 0, 0]:
return "point"
elif fingers == [1, 0, 0, 0, 0]:
return "thumb_up"
else:
return "unknown"
def recognize_volume(self, landmarks, img_shape):
"""识别手势控制音量(示例)"""
h, w, _ = img_shape
# 计算拇指和食指的距离
thumb_tip = landmarks[4]
index_tip = landmarks[8]
distance = np.sqrt(
(thumb_tip.x - index_tip.x)**2 +
(thumb_tip.y - index_tip.y)**2
)
# 映射到音量范围
volume = np.interp(distance, [0.05, 0.3], [0, 100])
return min(100, max(0, volume))
def run_with_feedback(self):
"""运行带反馈的识别"""
cap = cv2.VideoCapture(0)
while True:
success, img = cap.read()
if not success:
break
img = cv2.flip(img, 1)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = self.hands.process(img_rgb)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# 画手部关键点
self.mp_draw.draw_landmarks(
img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS
)
# 识别手势
gesture = self.get_gesture(
hand_landmarks.landmark, img.shape
)
# 音量控制
volume = self.recognize_volume(
hand_landmarks.landmark, img.shape
)
# 显示信息
cv2.putText(img, f"Gesture: {gesture}", (10, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 显示音量条
cv2.rectangle(img, (50, 100), (50 + int(volume*2), 130),
(0, 255, 0), -1)
cv2.putText(img, f"Volume: {int(volume)}%", (50, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
cv2.imshow("Advanced Gesture Recognition", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# 运行高级手势识别
if __name__ == "__main__":
recognizer = AdvancedGestureRecognizer()
recognizer.run_with_feedback()
这些方法分别适用于不同的场景:
- MediaPipe方案:最简单,准确率高,适合快速开发
- OpenCV传统方法:不需要额外库,但准确率较低
- 深度学习方案:需要训练数据,但可定制性强
- 综合应用:结合多种技术实现更复杂的功能
建议从MediaPipe方案开始,它最容易上手且效果最好。