本文目录导读:

- 使用OpenCV的预训练模型(DNN模块)
- 使用OpenCV自带的Haar Cascade分类器
- 使用TensorFlow Object Detection API
- 综合示例:图像和视频目标检测
- 安装依赖和准备模型
- 关键概念说明
我来为你详细介绍Python实现目标检测的几种主流方法,并提供完整的案例代码。
使用OpenCV的预训练模型(DNN模块)
1 使用YOLOv3进行目标检测
import cv2
import numpy as np
import matplotlib.pyplot as plt
class YOLODetector:
def __init__(self, config_path, weights_path, classes_path):
# 加载类别名称
with open(classes_path, 'r') as f:
self.classes = [line.strip() for line in f.readlines()]
# 加载YOLO模型
self.net = cv2.dnn.readNet(weights_path, config_path)
# 获取输出层
self.layer_names = self.net.getLayerNames()
self.output_layers = [self.layer_names[i - 1] for i in self.net.getUnconnectedOutLayers()]
# 随机生成颜色
self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
def detect(self, image, confidence_threshold=0.5, nms_threshold=0.4):
height, width = image.shape[:2]
# 预处理图像
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
self.net.setInput(blob)
# 前向传播
outputs = self.net.forward(self.output_layers)
# 处理检测结果
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > confidence_threshold:
# 转换为边界框坐标
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# 计算左上角坐标
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# 应用非极大值抑制
indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
return boxes, confidences, class_ids, indexes
def draw_detections(self, image, boxes, confidences, class_ids, indexes):
result = image.copy()
font = cv2.FONT_HERSHEY_PLAIN
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(self.classes[class_ids[i]])
confidence = confidences[i]
color = self.colors[class_ids[i]]
# 绘制边界框
cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
# 绘制标签和置信度
text = f"{label}: {confidence:.2f}"
cv2.putText(result, text, (x, y - 5), font, 1, color, 2)
return result
# 使用示例
def detect_objects(image_path, config_path, weights_path, classes_path):
# 初始化检测器
detector = YOLODetector(config_path, weights_path, classes_path)
# 读取图像
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 执行检测
boxes, confidences, class_ids, indexes = detector.detect(image)
# 绘制结果
result = detector.draw_detections(image, boxes, confidences, class_ids, indexes)
# 显示结果
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('原始图像')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(result)
plt.title(f'检测到 {len(indexes)} 个物体')
plt.axis('off')
plt.show()
return result
# 下载YOLO模型文件的示例代码
def download_yolo_files():
import urllib.request
import os
# 创建模型目录
os.makedirs('yolo_files', exist_ok=True)
# 下载YOLOv3配置文件
config_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"
urllib.request.urlretrieve(config_url, "yolo_files/yolov3.cfg")
# 下载类别文件
classes_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
urllib.request.urlretrieve(classes_url, "yolo_files/coco.names")
print("请手动下载yolov3.weights文件")
print("下载地址: https://pjreddie.com/media/files/yolov3.weights")
# 主程序
if __name__ == "__main__":
# 检测图片中的物体
result = detect_objects(
'test_image.jpg',
'yolo_files/yolov3.cfg',
'yolo_files/yolov3.weights',
'yolo_files/coco.names'
)
使用OpenCV自带的Haar Cascade分类器
import cv2
import matplotlib.pyplot as plt
class HaarCascadeDetector:
def __init__(self):
# 加载预训练的分类器
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
self.eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
self.smile_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_smile.xml'
)
def detect_faces(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 检测人脸
faces = self.face_cascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
return faces
def detect_eyes_and_smiles(self, image, face_roi):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
roi_gray = gray[face_roi[1]:face_roi[1]+face_roi[3],
face_roi[0]:face_roi[0]+face_roi[2]]
roi_color = image[face_roi[1]:face_roi[1]+face_roi[3],
face_roi[0]:face_roi[0]+face_roi[2]]
# 检测眼睛
eyes = self.eye_cascade.detectMultiScale(roi_gray)
# 检测微笑
smiles = self.smile_cascade.detectMultiScale(
roi_gray,
scaleFactor=1.8,
minNeighbors=20
)
return eyes, smiles, roi_color
def draw_detections(self, image):
result = image.copy()
faces = self.detect_faces(image)
for (x, y, w, h) in faces:
# 绘制人脸框
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
face_roi = (x, y, w, h)
eyes, smiles, roi_color = self.detect_eyes_and_smiles(image, face_roi)
# 绘制眼睛
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
# 绘制微笑
for (sx, sy, sw, sh) in smiles:
cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)
# 添加表情标签
cv2.putText(roi_color, "Smile", (sx, sy-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
return result, len(faces)
# 实时视频检测
def real_time_detection():
cap = cv2.VideoCapture(0)
detector = HaarCascadeDetector()
while True:
ret, frame = cap.read()
if not ret:
break
result, face_count = detector.draw_detections(frame)
# 显示信息
cv2.putText(result, f"Faces: {face_count}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Real-time Face Detection', result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# 使用示例
if __name__ == "__main__":
# 检测图像中的人脸
image = cv2.imread('test_image.jpg')
detector = HaarCascadeDetector()
result, face_count = detector.draw_detections(image)
# 显示结果
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
plt.title(f'检测到 {face_count} 张人脸')
plt.axis('off')
plt.show()
使用TensorFlow Object Detection API
# 首先安装TensorFlow Object Detection API
"""
pip install tensorflow
pip install tensorflow-object-detection-api
"""
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
class TFObjectDetector:
def __init__(self, model_name='ssd_mobilenet_v2_coco_2018_03_29'):
# 加载预训练模型
self.model = tf.saved_model.load(f'{model_name}/saved_model')
# COCO类别名称
self.category_index = self._load_coco_categories()
def _load_coco_categories(self):
# 简化的COCO类别
categories = {
1: {'id': 1, 'name': 'person'},
2: {'id': 2, 'name': 'bicycle'},
3: {'id': 3, 'name': 'car'},
4: {'id': 4, 'name': 'motorcycle'},
5: {'id': 5, 'name': 'airplane'},
6: {'id': 6, 'name': 'bus'},
7: {'id': 7, 'name': 'train'},
8: {'id': 8, 'name': 'truck'},
9: {'id': 9, 'name': 'boat'},
10: {'id': 10, 'name': 'traffic light'},
# ... 更多类别
}
return categories
def detect(self, image):
# 转换图像格式
input_tensor = tf.convert_to_tensor(image)
input_tensor = input_tensor[tf.newaxis, ...]
# 执行检测
detections = self.model(input_tensor)
# 处理结果
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# 过滤低置信度检测
detection_classes = detections['detection_classes'].astype(np.int64)
detection_scores = detections['detection_scores']
detection_boxes = detections['detection_boxes']
return detection_boxes, detection_scores, detection_classes
def draw_detections(self, image, boxes, scores, classes,
min_score_thresh=0.5):
result = image.copy()
height, width = image.shape[:2]
for i in range(len(boxes)):
if scores[i] >= min_score_thresh:
# 转换边界框坐标
ymin, xmin, ymax, xmax = boxes[i]
xmin = int(xmin * width)
xmax = int(xmax * width)
ymin = int(ymin * height)
ymax = int(ymax * height)
# 获取类别名称
class_id = int(classes[i])
class_name = self.category_index.get(class_id, {}).get('name', 'Unknown')
# 生成随机颜色
color = tuple(np.random.randint(0, 255, 3).tolist())
# 绘制边界框
cv2.rectangle(result, (xmin, ymin), (xmax, ymax), color, 2)
# 绘制标签
label = f"{class_name}: {scores[i]:.2f}"
cv2.putText(result, label, (xmin, ymin-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return result
# 视频处理
def process_video(video_path, output_path=None):
detector = TFObjectDetector()
cap = cv2.VideoCapture(video_path)
if output_path:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, 20.0,
(int(cap.get(3)), int(cap.get(4))))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 检测物体
boxes, scores, classes = detector.detect(frame)
result = detector.draw_detections(frame, boxes, scores, classes)
if output_path:
out.write(result)
else:
cv2.imshow('Object Detection', result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
if output_path:
out.release()
cv2.destroyAllWindows()
综合示例:图像和视频目标检测
import cv2
import numpy as np
from pathlib import Path
class ObjectDetector:
def __init__(self, method='yolo'):
self.method = method
self.setup_detector()
def setup_detector(self):
if self.method == 'yolo':
# 初始化YOLO检测器
self.net = cv2.dnn.readNet(
'yolo_files/yolov3.weights',
'yolo_files/yolov3.cfg'
)
with open('yolo_files/coco.names', 'r') as f:
self.classes = [line.strip() for line in f.readlines()]
self.layer_names = self.net.getLayerNames()
self.output_layers = [
self.layer_names[i - 1]
for i in self.net.getUnconnectedOutLayers()
]
elif self.method == 'haar':
# 初始化Haar级联分类器
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
def detect_image(self, image_path, output_path=None):
"""检测单张图片"""
image = cv2.imread(str(image_path))
result = image.copy()
if self.method == 'yolo':
result = self._detect_yolo(image)
elif self.method == 'haar':
result = self._detect_haar(image)
if output_path:
cv2.imwrite(str(output_path), result)
return result
def detect_video(self, video_path, output_path=None):
"""检测视频"""
cap = cv2.VideoCapture(str(video_path))
if output_path:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
print(f"处理帧 {frame_count}")
if self.method == 'yolo':
result = self._detect_yolo(frame)
elif self.method == 'haar':
result = self._detect_haar(frame)
if output_path:
out.write(result)
else:
cv2.imshow('Object Detection', result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
if output_path:
out.release()
cv2.destroyAllWindows()
def _detect_yolo(self, image):
height, width = image.shape[:2]
# 预处理
blob = cv2.dnn.blobFromImage(
image, 0.00392, (416, 416), (0, 0, 0), True, crop=False
)
self.net.setInput(blob)
# 检测
outputs = self.net.forward(self.output_layers)
# 处理结果
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# NMS
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# 绘制结果
colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
result = image.copy()
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = self.classes[class_ids[i]]
color = colors[class_ids[i]]
cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
cv2.putText(
result, f"{label}: {confidences[i]:.2f}",
(x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
0.5, color, 2
)
return result
def _detect_haar(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = self.face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
)
result = image.copy()
for (x, y, w, h) in faces:
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.putText(
result, "Face", (x, y-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2
)
return result
# 使用示例
if __name__ == "__main__":
# YOLO检测
yolo_detector = ObjectDetector(method='yolo')
# 检测图片
result = yolo_detector.detect_image(
'input_image.jpg',
'output_image.jpg'
)
# 检测视频
yolo_detector.detect_video(
'input_video.mp4',
'output_video.mp4'
)
# Haar级联检测
haar_detector = ObjectDetector(method='haar')
result = haar_detector.detect_image('group_photo.jpg')
安装依赖和准备模型
# 安装基础依赖 pip install opencv-python numpy matplotlib tensorflow # 下载YOLO模型文件 # 访问 https://pjreddie.com/media/files/yolov3.weights 下载 # 创建模型目录和下载配置文件 mkdir -p yolo_files wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -O yolo_files/yolov3.cfg wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names -O yolo_files/coco.names
关键概念说明
- YOLO (You Only Look Once):实时目标检测算法,速度快
- Haar Cascade:基于特征的级联分类器,适合人脸检测
- NMS (Non-Maximum Suppression):非极大值抑制,去除重复检测框
- IoU (Intersection over Union):交并比,评估检测框重叠程度
- Confidence Score:置信度分数,表示检测的可靠程度
这些案例涵盖了从简单到复杂的目标检测实现,你可以根据实际需求选择合适的方法。