Python案例如何实现目标检测？

wen python案例 2026-06-16 1

本文目录导读：

Python案例如何实现目标检测？

使用OpenCV的预训练模型（DNN模块）
使用OpenCV自带的Haar Cascade分类器
使用TensorFlow Object Detection API
综合示例：图像和视频目标检测
安装依赖和准备模型
关键概念说明

我来为你详细介绍Python实现目标检测的几种主流方法,并提供完整的案例代码。

使用OpenCV的预训练模型（DNN模块）

1 使用YOLOv3进行目标检测

import cv2
import numpy as np
import matplotlib.pyplot as plt
class YOLODetector:
    def __init__(self, config_path, weights_path, classes_path):
        # 加载类别名称
        with open(classes_path, 'r') as f:
            self.classes = [line.strip() for line in f.readlines()]
        # 加载YOLO模型
        self.net = cv2.dnn.readNet(weights_path, config_path)
        # 获取输出层
        self.layer_names = self.net.getLayerNames()
        self.output_layers = [self.layer_names[i - 1] for i in self.net.getUnconnectedOutLayers()]
        # 随机生成颜色
        self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
    def detect(self, image, confidence_threshold=0.5, nms_threshold=0.4):
        height, width = image.shape[:2]
        # 预处理图像
        blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        self.net.setInput(blob)
        # 前向传播
        outputs = self.net.forward(self.output_layers)
        # 处理检测结果
        boxes = []
        confidences = []
        class_ids = []
        for output in outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > confidence_threshold:
                    # 转换为边界框坐标
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    # 计算左上角坐标
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)
        # 应用非极大值抑制
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
        return boxes, confidences, class_ids, indexes
    def draw_detections(self, image, boxes, confidences, class_ids, indexes):
        result = image.copy()
        font = cv2.FONT_HERSHEY_PLAIN
        if len(indexes) > 0:
            for i in indexes.flatten():
                x, y, w, h = boxes[i]
                label = str(self.classes[class_ids[i]])
                confidence = confidences[i]
                color = self.colors[class_ids[i]]
                # 绘制边界框
                cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
                # 绘制标签和置信度
                text = f"{label}: {confidence:.2f}"
                cv2.putText(result, text, (x, y - 5), font, 1, color, 2)
        return result
# 使用示例
def detect_objects(image_path, config_path, weights_path, classes_path):
    # 初始化检测器
    detector = YOLODetector(config_path, weights_path, classes_path)
    # 读取图像
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # 执行检测
    boxes, confidences, class_ids, indexes = detector.detect(image)
    # 绘制结果
    result = detector.draw_detections(image, boxes, confidences, class_ids, indexes)
    # 显示结果
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title('原始图像')
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(result)
    plt.title(f'检测到 {len(indexes)} 个物体')
    plt.axis('off')
    plt.show()
    return result
# 下载YOLO模型文件的示例代码
def download_yolo_files():
    import urllib.request
    import os
    # 创建模型目录
    os.makedirs('yolo_files', exist_ok=True)
    # 下载YOLOv3配置文件
    config_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"
    urllib.request.urlretrieve(config_url, "yolo_files/yolov3.cfg")
    # 下载类别文件
    classes_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
    urllib.request.urlretrieve(classes_url, "yolo_files/coco.names")
    print("请手动下载yolov3.weights文件")
    print("下载地址: https://pjreddie.com/media/files/yolov3.weights")
# 主程序
if __name__ == "__main__":
    # 检测图片中的物体
    result = detect_objects(
        'test_image.jpg',
        'yolo_files/yolov3.cfg',
        'yolo_files/yolov3.weights',
        'yolo_files/coco.names'
    )

使用OpenCV自带的Haar Cascade分类器

import cv2
import matplotlib.pyplot as plt
class HaarCascadeDetector:
    def __init__(self):
        # 加载预训练的分类器
        self.face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
        self.eye_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_eye.xml'
        )
        self.smile_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_smile.xml'
        )
    def detect_faces(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # 检测人脸
        faces = self.face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30)
        )
        return faces
    def detect_eyes_and_smiles(self, image, face_roi):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        roi_gray = gray[face_roi[1]:face_roi[1]+face_roi[3], 
                       face_roi[0]:face_roi[0]+face_roi[2]]
        roi_color = image[face_roi[1]:face_roi[1]+face_roi[3], 
                         face_roi[0]:face_roi[0]+face_roi[2]]
        # 检测眼睛
        eyes = self.eye_cascade.detectMultiScale(roi_gray)
        # 检测微笑
        smiles = self.smile_cascade.detectMultiScale(
            roi_gray,
            scaleFactor=1.8,
            minNeighbors=20
        )
        return eyes, smiles, roi_color
    def draw_detections(self, image):
        result = image.copy()
        faces = self.detect_faces(image)
        for (x, y, w, h) in faces:
            # 绘制人脸框
            cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
            face_roi = (x, y, w, h)
            eyes, smiles, roi_color = self.detect_eyes_and_smiles(image, face_roi)
            # 绘制眼睛
            for (ex, ey, ew, eh) in eyes:
                cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
            # 绘制微笑
            for (sx, sy, sw, sh) in smiles:
                cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)
                # 添加表情标签
                cv2.putText(roi_color, "Smile", (sx, sy-5), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        return result, len(faces)
# 实时视频检测
def real_time_detection():
    cap = cv2.VideoCapture(0)
    detector = HaarCascadeDetector()
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        result, face_count = detector.draw_detections(frame)
        # 显示信息
        cv2.putText(result, f"Faces: {face_count}", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Real-time Face Detection', result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
# 使用示例
if __name__ == "__main__":
    # 检测图像中的人脸
    image = cv2.imread('test_image.jpg')
    detector = HaarCascadeDetector()
    result, face_count = detector.draw_detections(image)
    # 显示结果
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title('原始图像')
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
    plt.title(f'检测到 {face_count} 张人脸')
    plt.axis('off')
    plt.show()

使用TensorFlow Object Detection API

# 首先安装TensorFlow Object Detection API
"""
pip install tensorflow
pip install tensorflow-object-detection-api
"""
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
class TFObjectDetector:
    def __init__(self, model_name='ssd_mobilenet_v2_coco_2018_03_29'):
        # 加载预训练模型
        self.model = tf.saved_model.load(f'{model_name}/saved_model')
        # COCO类别名称
        self.category_index = self._load_coco_categories()
    def _load_coco_categories(self):
        # 简化的COCO类别
        categories = {
            1: {'id': 1, 'name': 'person'},
            2: {'id': 2, 'name': 'bicycle'},
            3: {'id': 3, 'name': 'car'},
            4: {'id': 4, 'name': 'motorcycle'},
            5: {'id': 5, 'name': 'airplane'},
            6: {'id': 6, 'name': 'bus'},
            7: {'id': 7, 'name': 'train'},
            8: {'id': 8, 'name': 'truck'},
            9: {'id': 9, 'name': 'boat'},
            10: {'id': 10, 'name': 'traffic light'},
            # ... 更多类别
        }
        return categories
    def detect(self, image):
        # 转换图像格式
        input_tensor = tf.convert_to_tensor(image)
        input_tensor = input_tensor[tf.newaxis, ...]
        # 执行检测
        detections = self.model(input_tensor)
        # 处理结果
        num_detections = int(detections.pop('num_detections'))
        detections = {key: value[0, :num_detections].numpy()
                     for key, value in detections.items()}
        detections['num_detections'] = num_detections
        # 过滤低置信度检测
        detection_classes = detections['detection_classes'].astype(np.int64)
        detection_scores = detections['detection_scores']
        detection_boxes = detections['detection_boxes']
        return detection_boxes, detection_scores, detection_classes
    def draw_detections(self, image, boxes, scores, classes, 
                       min_score_thresh=0.5):
        result = image.copy()
        height, width = image.shape[:2]
        for i in range(len(boxes)):
            if scores[i] >= min_score_thresh:
                # 转换边界框坐标
                ymin, xmin, ymax, xmax = boxes[i]
                xmin = int(xmin * width)
                xmax = int(xmax * width)
                ymin = int(ymin * height)
                ymax = int(ymax * height)
                # 获取类别名称
                class_id = int(classes[i])
                class_name = self.category_index.get(class_id, {}).get('name', 'Unknown')
                # 生成随机颜色
                color = tuple(np.random.randint(0, 255, 3).tolist())
                # 绘制边界框
                cv2.rectangle(result, (xmin, ymin), (xmax, ymax), color, 2)
                # 绘制标签
                label = f"{class_name}: {scores[i]:.2f}"
                cv2.putText(result, label, (xmin, ymin-10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        return result
# 视频处理
def process_video(video_path, output_path=None):
    detector = TFObjectDetector()
    cap = cv2.VideoCapture(video_path)
    if output_path:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, 20.0, 
                            (int(cap.get(3)), int(cap.get(4))))
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # 检测物体
        boxes, scores, classes = detector.detect(frame)
        result = detector.draw_detections(frame, boxes, scores, classes)
        if output_path:
            out.write(result)
        else:
            cv2.imshow('Object Detection', result)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    cap.release()
    if output_path:
        out.release()
    cv2.destroyAllWindows()

综合示例：图像和视频目标检测

import cv2
import numpy as np
from pathlib import Path
class ObjectDetector:
    def __init__(self, method='yolo'):
        self.method = method
        self.setup_detector()
    def setup_detector(self):
        if self.method == 'yolo':
            # 初始化YOLO检测器
            self.net = cv2.dnn.readNet(
                'yolo_files/yolov3.weights',
                'yolo_files/yolov3.cfg'
            )
            with open('yolo_files/coco.names', 'r') as f:
                self.classes = [line.strip() for line in f.readlines()]
            self.layer_names = self.net.getLayerNames()
            self.output_layers = [
                self.layer_names[i - 1] 
                for i in self.net.getUnconnectedOutLayers()
            ]
        elif self.method == 'haar':
            # 初始化Haar级联分类器
            self.face_cascade = cv2.CascadeClassifier(
                cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
            )
    def detect_image(self, image_path, output_path=None):
        """检测单张图片"""
        image = cv2.imread(str(image_path))
        result = image.copy()
        if self.method == 'yolo':
            result = self._detect_yolo(image)
        elif self.method == 'haar':
            result = self._detect_haar(image)
        if output_path:
            cv2.imwrite(str(output_path), result)
        return result
    def detect_video(self, video_path, output_path=None):
        """检测视频"""
        cap = cv2.VideoCapture(str(video_path))
        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame_count += 1
            print(f"处理帧 {frame_count}")
            if self.method == 'yolo':
                result = self._detect_yolo(frame)
            elif self.method == 'haar':
                result = self._detect_haar(frame)
            if output_path:
                out.write(result)
            else:
                cv2.imshow('Object Detection', result)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        cap.release()
        if output_path:
            out.release()
        cv2.destroyAllWindows()
    def _detect_yolo(self, image):
        height, width = image.shape[:2]
        # 预处理
        blob = cv2.dnn.blobFromImage(
            image, 0.00392, (416, 416), (0, 0, 0), True, crop=False
        )
        self.net.setInput(blob)
        # 检测
        outputs = self.net.forward(self.output_layers)
        # 处理结果
        boxes = []
        confidences = []
        class_ids = []
        for output in outputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)
        # NMS
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        # 绘制结果
        colors = np.random.uniform(0, 255, size=(len(self.classes), 3))
        result = image.copy()
        if len(indexes) > 0:
            for i in indexes.flatten():
                x, y, w, h = boxes[i]
                label = self.classes[class_ids[i]]
                color = colors[class_ids[i]]
                cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
                cv2.putText(
                    result, f"{label}: {confidences[i]:.2f}",
                    (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, color, 2
                )
        return result
    def _detect_haar(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = self.face_cascade.detectMultiScale(
            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
        )
        result = image.copy()
        for (x, y, w, h) in faces:
            cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(
                result, "Face", (x, y-5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2
            )
        return result
# 使用示例
if __name__ == "__main__":
    # YOLO检测
    yolo_detector = ObjectDetector(method='yolo')
    # 检测图片
    result = yolo_detector.detect_image(
        'input_image.jpg',
        'output_image.jpg'
    )
    # 检测视频
    yolo_detector.detect_video(
        'input_video.mp4',
        'output_video.mp4'
    )
    # Haar级联检测
    haar_detector = ObjectDetector(method='haar')
    result = haar_detector.detect_image('group_photo.jpg')

安装依赖和准备模型

# 安装基础依赖
pip install opencv-python numpy matplotlib tensorflow
# 下载YOLO模型文件
# 访问 https://pjreddie.com/media/files/yolov3.weights 下载
# 创建模型目录和下载配置文件
mkdir -p yolo_files
wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -O yolo_files/yolov3.cfg
wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names -O yolo_files/coco.names

关键概念说明

YOLO (You Only Look Once)：实时目标检测算法，速度快
Haar Cascade：基于特征的级联分类器，适合人脸检测
NMS (Non-Maximum Suppression)：非极大值抑制，去除重复检测框
IoU (Intersection over Union)：交并比，评估检测框重叠程度
Confidence Score：置信度分数，表示检测的可靠程度

这些案例涵盖了从简单到复杂的目标检测实现,你可以根据实际需求选择合适的方法。