1. 程序整体架构分析

核心设计模式:单例模式 + 命令模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
service_manager.py
├── 配置层 (Configuration Layer)
│ ├── 服务定义 (services字典)
│ ├── 环境配置 (conda环境路径)
│ └── 日志配置 (logging配置)
├── 管理层 (Management Layer)
│ ├── ServiceManager类
│ ├── 进程管理 (PID文件持久化)
│ └── 状态监控 (进程状态检查)
├── 操作层 (Operation Layer)
│ ├── 启动服务 (start_service)
│ ├── 停止服务 (stop_service)
│ ├── 重启服务 (restart_service)
│ └── 状态查看 (status)
└── 接口层 (Interface Layer)
├── 命令行参数解析
├── 信号处理
└── 用户交互

2. 核心组件详解

2.1 配置层设计

1
2
3
4
5
6
7
8
9
10
# 服务配置字典 - 这是程序的核心配置
self.services = {
'service_name': {
'command': 'script.py', # 执行命令
'port': 8000, # 端口号
'log_file': './logs/service.log', # 日志文件路径
'description': '服务描述', # 服务说明
'env': 'conda_env_name' # conda环境名
}
}

设计原则:

  • 配置与代码分离:所有服务配置集中在一个字典中
  • 可扩展性:添加新服务只需在字典中添加配置
  • 可维护性:每个服务的配置项清晰明确

2.2 管理层设计

1
2
3
4
5
6
7
8
9
10
11
class ServiceManager:
def __init__(self):
# 1. 初始化配置
self.services = {...}

# 2. 进程状态持久化
self.processes = {} # 内存中的进程状态
self.pid_file = 'service_pids.json' # 磁盘持久化

# 3. 加载历史状态
self.load_pids()

核心思想:

  • 状态持久化:进程PID保存到文件,重启后仍能管理
  • 状态同步:内存状态与磁盘状态保持一致

2.3 操作层设计

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def start_service(self, service_name: str, force: bool = False) -> bool:
# 1. 参数验证
if service_name not in self.services:
return False

# 2. 状态检查
if self.is_service_running(service_name) and not force:
return True

# 3. 环境准备
env_path = self.get_conda_env_path(service['env'])
log_path = self.prepare_log_directory(service['log_file'])

# 4. 启动进程
process = self.spawn_process(service, env_path, log_path)

# 5. 状态更新
if process.is_running():
self.processes[service_name] = process.pid
self.save_pids()
return True

return False

操作模式:

  • 验证 → 检查 → 准备 → 执行 → 更新
  • 每个步骤都有明确的职责和错误处理

3. 如何独立写出这种程序

3.1 第一步:明确需求

1
2
3
4
5
6
# 需求分析
需求 = {
"功能": ["启动服务", "停止服务", "重启服务", "查看状态"],
"特性": ["批量操作", "状态持久化", "日志管理", "错误处理"],
"接口": ["命令行", "配置文件", "日志输出"]
}

3.2 第二步:设计数据结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 服务配置结构
ServiceConfig = {
"name": str, # 服务名称
"command": str, # 启动命令
"port": int, # 端口号
"log_file": str, # 日志文件
"env": str, # 运行环境
"description": str # 服务描述
}

# 进程状态结构
ProcessState = {
"pid": int, # 进程ID
"start_time": str, # 启动时间
"status": str # 运行状态
}

3.3 第三步:实现核心类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
class ServiceManager:
def __init__(self, config_file: str = None):
"""初始化服务管理器"""
self.config = self.load_config(config_file)
self.processes = {}
self.state_file = "service_state.json"
self.load_state()

def load_config(self, config_file: str) -> dict:
"""加载配置文件"""
# 实现配置加载逻辑
pass

def load_state(self):
"""加载进程状态"""
# 实现状态加载逻辑
pass

def save_state(self):
"""保存进程状态"""
# 实现状态保存逻辑
pass

3.4 第四步:实现核心操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class ServiceManager:
def start_service(self, service_name: str) -> bool:
"""启动服务"""
# 1. 验证服务配置
# 2. 检查服务状态
# 3. 准备运行环境
# 4. 启动进程
# 5. 更新状态
pass

def stop_service(self, service_name: str) -> bool:
"""停止服务"""
# 1. 查找进程
# 2. 发送停止信号
# 3. 等待进程结束
# 4. 更新状态
pass

def get_service_status(self, service_name: str) -> dict:
"""获取服务状态"""
# 1. 检查进程状态
# 2. 检查端口状态
# 3. 返回状态信息
pass

3.5 第五步:实现命令行接口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def main():
"""主函数"""
import argparse

parser = argparse.ArgumentParser(description="服务管理工具")
parser.add_argument("action", choices=["start", "stop", "restart", "status"])
parser.add_argument("service", nargs="?", help="服务名称")
parser.add_argument("--all", action="store_true", help="操作所有服务")

args = parser.parse_args()

manager = ServiceManager()

if args.action == "start":
if args.all:
manager.start_all()
else:
manager.start_service(args.service)
# ... 其他操作

4. 关键设计模式

4.1 状态机模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class ServiceState:
STOPPED = "stopped"
STARTING = "starting"
RUNNING = "running"
STOPPING = "stopping"
ERROR = "error"

class Service:
def __init__(self):
self.state = ServiceState.STOPPED

def start(self):
if self.state == ServiceState.RUNNING:
return False

self.state = ServiceState.STARTING
# 启动逻辑
self.state = ServiceState.RUNNING

4.2 观察者模式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
class ServiceObserver:
def on_service_started(self, service_name: str):
pass

def on_service_stopped(self, service_name: str):
pass

def on_service_error(self, service_name: str, error: str):
pass

class ServiceManager:
def __init__(self):
self.observers = []

def add_observer(self, observer: ServiceObserver):
self.observers.append(observer)

def notify_started(self, service_name: str):
for observer in self.observers:
observer.on_service_started(service_name)

4.3 工厂模式

1
2
3
4
5
6
7
8
9
10
11
class ServiceFactory:
@staticmethod
def create_service(service_type: str, config: dict):
if service_type == "python":
return PythonService(config)
elif service_type == "node":
return NodeService(config)
elif service_type == "java":
return JavaService(config)
else:
raise ValueError(f"Unknown service type: {service_type}")

5. 扩展功能设计

5.1 配置文件支持

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# config.yaml
services:
web_server:
command: "python app.py"
port: 8000
env: "web_env"
auto_restart: true
health_check: "http://localhost:8000/health"

database:
command: "mongod"
port: 27017
env: "db_env"
auto_restart: false

5.2 健康检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
class HealthChecker:
def check_service_health(self, service_name: str) -> bool:
service = self.services[service_name]

# 检查进程状态
if not self.is_process_running(service['pid']):
return False

# 检查端口状态
if not self.is_port_listening(service['port']):
return False

# 检查健康端点
if service.get('health_check'):
return self.check_health_endpoint(service['health_check'])

return True

5.3 自动重启

1
2
3
4
5
6
7
8
9
10
11
12
13
class AutoRestartManager:
def __init__(self):
self.failed_services = {}
self.max_retries = 3

def handle_service_failure(self, service_name: str):
if service_name not in self.failed_services:
self.failed_services[service_name] = 0

self.failed_services[service_name] += 1

if self.failed_services[service_name] <= self.max_retries:
self.restart_service(service_name)

6. 测试策略

6.1 单元测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import unittest
from unittest.mock import Mock, patch

class TestServiceManager(unittest.TestCase):
def setUp(self):
self.manager = ServiceManager()

@patch('subprocess.Popen')
def test_start_service(self, mock_popen):
# 模拟进程启动
mock_process = Mock()
mock_process.poll.return_value = None
mock_process.pid = 12345
mock_popen.return_value = mock_process

result = self.manager.start_service("test_service")
self.assertTrue(result)

6.2 集成测试

1
2
3
4
5
6
7
8
9
class IntegrationTest(unittest.TestCase):
def test_full_lifecycle(self):
# 启动服务
self.manager.start_service("test_service")
self.assertTrue(self.manager.is_service_running("test_service"))

# 停止服务
self.manager.stop_service("test_service")
self.assertFalse(self.manager.is_service_running("test_service"))

7. 最佳实践总结

7.1 代码组织

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
service_manager/
├── __init__.py
├── core/
│ ├── __init__.py
│ ├── manager.py # 核心管理类
│ ├── config.py # 配置管理
│ └── state.py # 状态管理
├── services/
│ ├── __init__.py
│ ├── base.py # 服务基类
│ ├── python.py # Python服务
│ └── node.py # Node服务
├── utils/
│ ├── __init__.py
│ ├── process.py # 进程工具
│ └── network.py # 网络工具
├── cli.py # 命令行接口
└── config.yaml # 配置文件

7.2 错误处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
class ServiceError(Exception):
pass

class ServiceNotFoundError(ServiceError):
pass

class ServiceStartError(ServiceError):
pass

class ServiceStopError(ServiceError):
pass

def start_service(self, service_name: str) -> bool:
try:
if service_name not in self.services:
raise ServiceNotFoundError(f"Service {service_name} not found")

# 启动逻辑
return True
except Exception as e:
logger.error(f"Failed to start service {service_name}: {e}")
return False

7.3 日志管理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import logging
from logging.handlers import RotatingFileHandler

def setup_logging():
logger = logging.getLogger('service_manager')
logger.setLevel(logging.INFO)

# 文件处理器
file_handler = RotatingFileHandler(
'service_manager.log',
maxBytes=10*1024*1024, # 10MB
backupCount=5
)

# 控制台处理器
console_handler = logging.StreamHandler()

# 格式化器
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

logger.addHandler(file_handler)
logger.addHandler(console_handler)

return logger

通过这种系统性的设计,你可以构建出功能强大、易于维护的服务管理程序。关键是要理解每个组件的职责,以及它们之间如何协作。