本文目录导读:

在Python中重启异常进程有多种方法,我来介绍几种常见且实用的方案:
使用子进程管理(推荐)
基础版本
import subprocess
import time
import sys
def run_script():
"""运行需要监控的脚本"""
try:
# 运行另一个Python脚本
process = subprocess.Popen([sys.executable, 'your_script.py'])
process.wait()
except Exception as e:
print(f"进程异常退出: {e}")
return False
return process.returncode == 0
def monitor_and_restart():
"""监控并重启进程"""
max_restarts = 5 # 最大重启次数
restart_count = 0
while restart_count < max_restarts:
print(f"启动进程 (第{restart_count + 1}次)")
if run_script():
print("进程正常退出")
break
else:
restart_count += 1
print(f"进程异常,{restart_count}秒后重启...")
time.sleep(restart_count * 2) # 指数退避
if restart_count >= max_restarts:
print("达到最大重启次数,停止监控")
if __name__ == "__main__":
monitor_and_restart()
增强版本(带日志和邮件通知)
import subprocess
import time
import sys
import logging
import smtplib
from datetime import datetime
from pathlib import Path
class ProcessMonitor:
def __init__(self, script_path, max_restarts=5, restart_delay=5):
self.script_path = script_path
self.max_restarts = max_restarts
self.restart_delay = restart_delay
self.restart_count = 0
self.setup_logging()
def setup_logging(self):
"""配置日志"""
log_file = Path(f"process_monitor_{datetime.now().strftime('%Y%m%d')}.log")
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def send_alert(self, message):
"""发送告警(可选)"""
try:
# 这里可以集成邮件、钉钉、企业微信等通知
pass
except Exception as e:
self.logger.error(f"发送告警失败: {e}")
def run_process(self):
"""运行目标进程"""
try:
process = subprocess.Popen(
[sys.executable, self.script_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# 非阻塞方式读取输出
stdout, stderr = process.communicate(timeout=3600) # 1小时超时
if process.returncode != 0:
self.logger.error(f"进程退出码: {process.returncode}")
if stderr:
self.logger.error(f"错误输出: {stderr.decode()}")
return False
return True
except subprocess.TimeoutExpired:
self.logger.warning("进程超时,强制终止")
process.kill()
return False
except Exception as e:
self.logger.error(f"运行进程异常: {e}")
return False
def start_monitoring(self):
"""开始监控"""
self.logger.info(f"开始监控进程: {self.script_path}")
while self.restart_count < self.max_restarts:
self.restart_count += 1
self.logger.info(f"第{self.restart_count}次启动进程")
if self.run_process():
self.logger.info("进程正常退出")
break
else:
if self.restart_count < self.max_restarts:
delay = self.restart_delay * self.restart_count
self.logger.info(f"{delay}秒后重启...")
time.sleep(delay)
else:
self.logger.error("达到最大重启次数")
self.send_alert(f"进程 {self.script_path} 重启失败")
self.logger.info("监控结束")
# 使用示例
monitor = ProcessMonitor('your_script.py', max_restarts=3, restart_delay=5)
monitor.start_monitoring()
使用supervisor管理(生产环境推荐)
supervisor配置示例
[program:my_python_app] command=python /path/to/your_script.py directory=/path/to/app autostart=true autorestart=true startretries=3 stopasgroup=true killasgroup=true user=www-data stdout_logfile=/var/log/myapp.log stderr_logfile=/var/log/myapp_error.log
使用进程管理器类
import psutil
import time
import os
import signal
class ProcessManager:
def __init__(self, process_name, script_path):
self.process_name = process_name
self.script_path = script_path
self.process = None
def start(self):
"""启动进程"""
self.process = subprocess.Popen(
[sys.executable, self.script_path],
preexec_fn=os.setsid # 创建新的进程组
)
return self.process.pid
def stop(self):
"""停止进程"""
if self.process:
os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
def is_running(self):
"""检查进程是否运行"""
if self.process:
return self.process.poll() is None
return False
def restart(self):
"""重启进程"""
self.stop()
time.sleep(2)
return self.start()
def monitor(self, check_interval=10):
"""持续监控"""
while True:
if not self.is_running():
print(f"进程 {self.process_name} 已停止,准备重启")
pid = self.start()
print(f"进程已重启,新PID: {pid}")
time.sleep(check_interval)
# 使用示例
manager = ProcessManager("myapp", "your_script.py")
manager.monitor()
使用Python-daemon实现守护进程
import daemon
import lockfile
import time
import sys
def run_forever():
"""需要监控的主程序"""
while True:
try:
# 你的主要业务逻辑
print("程序运行中...")
time.sleep(5)
except Exception as e:
print(f"异常: {e}")
time.sleep(3)
def create_daemon():
"""创建守护进程"""
context = daemon.DaemonContext(
working_directory='/path/to/app',
umask=0o002,
pidfile=lockfile.FileLock('/var/run/myapp.pid'),
)
with context:
run_forever()
if __name__ == "__main__":
create_daemon()
Docker方式(现代推荐)
Dockerfile
FROM python:3.9 WORKDIR /app COPY . /app RUN pip install -r requirements.txt # 使用supervisor管理进程 RUN apt-get update && apt-get install -y supervisor COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
最佳实践建议
- 谨慎设置重启次数:避免无限重启导致资源耗尽
- 实现指数退避:重启间隔逐渐增加
- 记录详细日志:便于故障排查
- 设置进程超时:防止僵尸进程
- 监控资源使用:内存泄漏、CPU过高等
- 使用信号处理:优雅关闭进程
- 考虑使用专门的进程管理工具:如supervisor、systemd等
生产环境推荐方案
- 小型项目:使用
subprocess+ 简单监控脚本 - 中型项目:使用
supervisor或systemd - 大型项目:使用
Kubernetes或Docker Swarm编排
选择哪种方案取决于你的具体需求和环境复杂度。