开发环境与生产环境的差异:
开发:单个用户
生产:成百上千用户
开发:宽松
生产:严格安全策略
开发:响应即可
生产:高速响应,高可用
| 平台类型 | 代表服务 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|---|
| 传统VPS | DigitalOcean, Linode | 完全控制,成本低 | 需要手动配置 | 中小型项目,有运维能力 |
| PaaS平台 | Heroku, Railway | 简单易用,自动扩展 | 成本较高,控制有限 | 快速原型,初创公司 |
| 容器平台 | Docker, Kubernetes | 高度可移植,弹性伸缩 | 学习曲线陡峭 | 微服务,大规模应用 |
| 云服务商 | AWS, GCP, Azure | 服务全面,全球覆盖 | 复杂,成本不可预测 | 企业级应用,全球业务 |
| Serverless | AWS Lambda, Vercel | 按需付费,自动扩展 | 冷启动延迟,限制多 | 事件驱动,流量波动大 |
在生产环境中必须禁用调试模式,否则会暴露敏感信息和安全漏洞。
# app/__init__.py - 确保生产环境配置正确
import os
from flask import Flask
def create_app():
app = Flask(__name__)
# 根据环境变量加载配置
env = os.environ.get('FLASK_ENV', 'development')
if env == 'production':
app.config.from_object('config.ProductionConfig')
# 验证关键配置
required_vars = ['SECRET_KEY', 'DATABASE_URL']
for var in required_vars:
if not app.config.get(var):
raise ValueError(f"生产环境必须设置 {var}")
else:
app.config.from_object('config.DevelopmentConfig')
# 禁用调试信息
app.debug = False
app.config['DEBUG'] = False
app.config['TESTING'] = False
# 安全配置
app.config['SESSION_COOKIE_SECURE'] = True # 仅HTTPS
app.config['REMEMBER_COOKIE_SECURE'] = True
app.config['SESSION_COOKIE_HTTPONLY'] = True
app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
# 性能优化
app.config['JSON_SORT_KEYS'] = False
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
# 确保所有扩展都已正确配置
configure_extensions(app)
register_blueprints(app)
return app
# 创建应用实例
app = create_app()
# 只在开发环境启用调试路由
if os.environ.get('FLASK_ENV') != 'production':
@app.route('/debug')
def debug_info():
return "调试信息"
# requirements.txt - 生产环境依赖
# 基础依赖
Flask==2.3.3
Werkzeug==2.3.7
Jinja2==3.1.2
itsdangerous==2.1.2
click==8.1.3
# 数据库
SQLAlchemy==2.0.19
Flask-SQLAlchemy==3.0.5
psycopg2-binary==2.9.7 # PostgreSQL驱动
redis==4.6.0
# WSGI服务器
gunicorn==21.2.0
gevent==23.9.1
# 安全
Flask-Login==0.6.2
Flask-WTF==1.1.1
Flask-CORS==4.0.0
bcrypt==4.0.1
cryptography==41.0.5
# 实用工具
python-dotenv==1.0.0
requests==2.31.0
celery==5.3.4
Flask-Mail==0.9.1
# 监控和日志
blinker==1.7.0
structlog==23.1.0
sentry-sdk[flask]==1.35.0
# 使用requirements.in和pip-tools进行更精细的控制
# requirements.in 文件内容:
# Flask
# SQLAlchemy
# gunicorn
# redis
#
# 然后运行:pip-compile requirements.in > requirements.txt
# 创建分层依赖文件
# requirements/
# ├── base.txt # 基础依赖
# ├── production.txt # 生产环境
# └── development.txt # 开发环境
# base.txt
cat > requirements/base.txt << EOF
Flask>=2.3.0,<3.0.0
Werkzeug>=2.3.0,<3.0.0
Jinja2>=3.1.0,<4.0.0
itsdangerous>=2.1.0,<3.0.0
click>=8.0.0,<9.0.0
python-dotenv>=1.0.0,<2.0.0
EOF
# production.txt
cat > requirements/production.txt << EOF
-r base.txt
gunicorn>=20.0.0,<22.0.0
gevent>=22.0.0,<24.0.0
psycopg2-binary>=2.9.0,<3.0.0
redis>=4.5.0,<5.0.0
sentry-sdk[flask]>=1.30.0,<2.0.0
EOF
# development.txt
cat > requirements/development.txt << EOF
-r base.txt
pytest>=7.0.0,<8.0.0
pytest-cov>=4.0.0,<5.0.0
black>=23.0.0,<24.0.0
flake8>=6.0.0,<7.0.0
Flask-DebugToolbar>=0.13.0,<1.0.0
EOF
# 安装命令
pip install -r requirements/production.txt # 生产环境
pip install -r requirements/development.txt # 开发环境
# .env.production - 生产环境变量(不要提交到版本控制)
# ============ 应用配置 ============
FLASK_ENV=production
FLASK_APP=app:create_app()
SECRET_KEY=your-production-secret-key-change-this
SECURITY_PASSWORD_SALT=your-salt-change-this
# ============ 数据库配置 ============
DATABASE_URL=postgresql://username:password@localhost:5432/production_db
REDIS_URL=redis://localhost:6379/0
# ============ 邮件配置 ============
MAIL_SERVER=smtp.gmail.com
MAIL_PORT=587
MAIL_USE_TLS=true
MAIL_USERNAME=your-email@gmail.com
MAIL_PASSWORD=your-app-password
MAIL_DEFAULT_SENDER=noreply@example.com
# ============ 第三方服务 ============
STRIPE_SECRET_KEY=sk_live_xxxxxxxxxxxxxx
STRIPE_PUBLIC_KEY=pk_live_xxxxxxxxxxxxxx
GOOGLE_MAPS_API_KEY=AIzaSyxxxxxxxxxxxxxxxxxxxxxx
AWS_ACCESS_KEY_ID=AKIAxxxxxxxxxxxxxx
AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# ============ 监控和日志 ============
SENTRY_DSN=https://xxxxxxxxxxxxxxxx@sentry.io/xxxxxxx
LOG_LEVEL=WARNING
LOG_FILE=/var/log/flask/app.log
# ============ 性能配置 ============
WORKERS=4
THREADS=2
TIMEOUT=30
MAX_REQUESTS=1000
MAX_REQUESTS_JITTER=50
# ============ 安全配置 ============
SESSION_COOKIE_SECURE=true
REMEMBER_COOKIE_SECURE=true
CORS_ORIGINS=https://yourdomain.com
# ============ 应用特定配置 ============
ITEMS_PER_PAGE=20
UPLOAD_FOLDER=/var/www/uploads
MAX_CONTENT_LENGTH=52428800 # 50MB
HTTP/HTTPS请求
反向代理,静态文件,SSL
WSGI服务器,进程管理
业务逻辑处理
#!/bin/bash
# setup_server.sh - Ubuntu/Debian服务器初始化脚本
# 更新系统
sudo apt update
sudo apt upgrade -y
# 安装基础工具
sudo apt install -y curl wget git build-essential software-properties-common
# 安装Python 3.11
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update
sudo apt install -y python3.11 python3.11-venv python3.11-dev
# 设置Python 3.11为默认
sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
# 安装PostgreSQL
sudo apt install -y postgresql postgresql-contrib libpq-dev
# 配置PostgreSQL
sudo -u postgres psql << EOF
CREATE DATABASE flask_production;
CREATE USER flask_user WITH PASSWORD 'secure_password';
ALTER ROLE flask_user SET client_encoding TO 'utf8';
ALTER ROLE flask_user SET default_transaction_isolation TO 'read committed';
ALTER ROLE flask_user SET timezone TO 'UTC';
GRANT ALL PRIVILEGES ON DATABASE flask_production TO flask_user;
\q
EOF
# 安装Redis
sudo apt install -y redis-server
sudo systemctl enable redis-server
sudo systemctl start redis-server
# 安装Nginx
sudo apt install -y nginx
# 创建应用用户
sudo useradd --system --group --shell /bin/bash flaskapp
sudo mkdir -p /var/www/flaskapp
sudo chown -R flaskapp:flaskapp /var/www/flaskapp
# 安装系统依赖
sudo apt install -y libjpeg-dev libpng-dev libfreetype6-dev
echo "服务器初始化完成!"
Gunicorn是一个成熟的Python WSGI HTTP服务器,与Flask完美集成,支持多进程和多线程。
# gunicorn_config.py - Gunicorn配置文件
import os
import multiprocessing
# 服务器配置
bind = "127.0.0.1:8000" # 只监听本地,通过Nginx反向代理
workers = multiprocessing.cpu_count() * 2 + 1 # 推荐公式
threads = 2 # 每个worker的线程数
worker_class = "gevent" # 使用gevent异步worker
worker_connections = 1000 # 每个worker最大连接数
# 超时设置
timeout = 30 # 请求超时时间(秒)
keepalive = 2 # keep-alive连接数
graceful_timeout = 30 # 优雅关闭超时
# 日志配置
accesslog = "/var/log/gunicorn/access.log"
errorlog = "/var/log/gunicorn/error.log"
loglevel = "info"
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
# 进程管理
max_requests = 1000 # worker处理请求数后重启
max_requests_jitter = 50 # 随机抖动,防止所有worker同时重启
preload_app = True # 预加载应用,提高性能
# 安全配置
limit_request_line = 4094 # 限制请求行大小
limit_request_fields = 100 # 限制请求头数量
limit_request_field_size = 8190 # 限制请求头大小
# 性能调优
backlog = 2048 # 挂起连接队列大小
def when_ready(server):
"""服务器启动时调用"""
server.log.info("Gunicorn服务器已启动")
def worker_int(worker):
"""worker异常退出时调用"""
worker.log.info("worker异常退出: %s", worker.pid)
def worker_abort(worker):
"""worker被终止时调用"""
worker.log.info("worker被终止: %s", worker.pid)
# 创建Systemd服务文件
# /etc/systemd/system/flaskapp.service
cat > /etc/systemd/system/flaskapp.service << EOF
[Unit]
Description=Flask Application with Gunicorn
After=network.target postgresql.service redis-server.service
Requires=postgresql.service redis-server.service
[Service]
User=flaskapp
Group=flaskapp
WorkingDirectory=/var/www/flaskapp
Environment="PATH=/var/www/flaskapp/venv/bin"
Environment="FLASK_ENV=production"
EnvironmentFile=/var/www/flaskapp/.env.production
ExecStart=/var/www/flaskapp/venv/bin/gunicorn \\
--config /var/www/flaskapp/gunicorn_config.py \\
app:create_app()
# 优雅重启信号
ExecReload=/bin/kill -s HUP \$MAINPID
# 优雅停止信号
ExecStop=/bin/kill -s TERM \$MAINPID
# 如果进程崩溃,自动重启
Restart=on-failure
# 重启间隔
RestartSec=10
# 安全限制
PrivateTmp=true
ProtectSystem=full
NoNewPrivileges=true
ReadWritePaths=/var/www/flaskapp/uploads /var/log/gunicorn
# 资源限制
LimitNOFILE=65535
LimitNPROC=512
[Install]
WantedBy=multi-user.target
EOF
# 启用并启动服务
sudo systemctl daemon-reload
sudo systemctl enable flaskapp
sudo systemctl start flaskapp
sudo systemctl status flaskapp
# /etc/nginx/sites-available/flaskapp
server {
listen 80;
server_name yourdomain.com www.yourdomain.com;
# 重定向HTTP到HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name yourdomain.com www.yourdomain.com;
# SSL证书配置
ssl_certificate /etc/letsencrypt/live/yourdomain.com/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/yourdomain.com/privkey.pem;
# SSL优化配置
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 安全头
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "no-referrer-when-downgrade" always;
add_header Content-Security-Policy "default-src 'self' http: https: data: blob: 'unsafe-inline'" always;
# 上传文件大小限制
client_max_body_size 50M;
# 静态文件服务
location /static/ {
alias /var/www/flaskapp/app/static/;
expires 1y;
add_header Cache-Control "public, immutable";
# 启用gzip压缩
gzip_static on;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
}
location /uploads/ {
alias /var/www/flaskapp/uploads/;
expires 30d;
add_header Cache-Control "public";
# 安全限制
internal;
}
# 媒体文件
location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
expires 7d;
add_header Cache-Control "public, max-age=604800";
}
# 反向代理到Gunicorn
location / {
proxy_pass http://127.0.0.1:8000;
# 代理头设置
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $server_name;
# 超时设置
proxy_connect_timeout 75s;
proxy_send_timeout 3600s;
proxy_read_timeout 3600s;
# 缓冲区优化
proxy_buffering on;
proxy_buffer_size 128k;
proxy_buffers 256 16k;
proxy_busy_buffers_size 256k;
proxy_temp_file_write_size 256k;
# WebSocket支持
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# 健康检查端点
location /health {
proxy_pass http://127.0.0.1:8000;
access_log off;
}
# 禁止访问敏感文件
location ~ /\.(?!well-known) {
deny all;
}
location ~ /\.ht {
deny all;
}
# 错误页面
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
}
# 配置SSL证书(使用Let's Encrypt)
sudo apt install -y certbot python3-certbot-nginx
# 获取证书
sudo certbot --nginx -d yourdomain.com -d www.yourdomain.com
# 自动续期测试
sudo certbot renew --dry-run
# 启用站点
sudo ln -s /etc/nginx/sites-available/flaskapp /etc/nginx/sites-enabled/
sudo nginx -t # 测试配置
sudo systemctl reload nginx
减少镜像体积,提高安全性,优化构建缓存,适合生产环境。
# Dockerfile - 生产环境Dockerfile
# 第一阶段:构建阶段
FROM python:3.11-slim as builder
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
gcc \
g++ \
libpq-dev \
libjpeg-dev \
libpng-dev \
libfreetype6-dev \
&& rm -rf /var/lib/apt/lists/*
# 创建虚拟环境
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# 安装Python依赖
COPY requirements/production.txt .
RUN pip install --no-cache-dir -r production.txt
# 第二阶段:运行阶段
FROM python:3.11-slim
# 安装运行时依赖
RUN apt-get update && apt-get install -y \
libpq5 \
libjpeg62-turbo \
libpng16-16 \
libfreetype6 \
curl \
&& rm -rf /var/lib/apt/lists/*
# 创建非root用户
RUN groupadd -r flask && useradd -r -g flask flask
# 创建应用目录
WORKDIR /app
# 从构建阶段复制虚拟环境
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# 复制应用代码
COPY . .
# 设置权限
RUN chown -R flask:flask /app
USER flask
# 创建必要的目录
RUN mkdir -p logs uploads static
# 环境变量
ENV FLASK_APP=app:create_app()
ENV FLASK_ENV=production
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app
# 暴露端口
EXPOSE 8000
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# 启动命令
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:create_app()"]
# docker-compose.prod.yml
version: '3.8'
services:
web:
build:
context: .
dockerfile: Dockerfile
args:
- ENVIRONMENT=production
image: yourregistry.com/flaskapp:${TAG:-latest}
container_name: flaskapp-web
restart: unless-stopped
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
environment:
- FLASK_ENV=production
- DATABASE_URL=postgresql://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME}
- REDIS_URL=redis://redis:6379/0
- SECRET_KEY=${SECRET_KEY}
- MAIL_USERNAME=${MAIL_USERNAME}
- MAIL_PASSWORD=${MAIL_PASSWORD}
env_file:
- .env.production
volumes:
- uploads:/app/uploads
- logs:/app/logs
- static:/app/app/static
ports:
- "8000:8000"
networks:
- app-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
db:
image: postgres:15-alpine
container_name: flaskapp-db
restart: unless-stopped
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
volumes:
- postgres_data:/var/lib/postgresql/data
- ./docker/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
ports:
- "5432:5432"
networks:
- app-network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 10s
timeout: 5s
retries: 5
command: >
postgres
-c max_connections=100
-c shared_buffers=256MB
-c effective_cache_size=1GB
redis:
image: redis:7-alpine
container_name: flaskapp-redis
restart: unless-stopped
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
volumes:
- redis_data:/data
ports:
- "6379:6379"
networks:
- app-network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
nginx:
image: nginx:alpine
container_name: flaskapp-nginx
restart: unless-stopped
depends_on:
- web
ports:
- "80:80"
- "443:443"
volumes:
- ./docker/nginx/nginx.conf:/etc/nginx/nginx.conf
- ./docker/nginx/ssl:/etc/nginx/ssl
- static:/var/www/static:ro
- uploads:/var/www/uploads:ro
- ./logs/nginx:/var/log/nginx
networks:
- app-network
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
certbot:
image: certbot/certbot
container_name: flaskapp-certbot
volumes:
- ./docker/nginx/ssl:/etc/letsencrypt
- ./docker/nginx/webroot:/var/www/certbot
depends_on:
- nginx
networks:
- app-network
command: certonly --webroot -w /var/www/certbot --force-renewal --email ${CERTBOT_EMAIL} -d ${DOMAIN_NAME} --agree-tos
volumes:
postgres_data:
driver: local
redis_data:
driver: local
uploads:
driver: local
logs:
driver: local
static:
driver: local
networks:
app-network:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
# docker/nginx/nginx.conf
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# 日志格式
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
error_log /var/log/nginx/error.log warn;
# 性能优化
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
client_max_body_size 50M;
# Gzip压缩
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_proxied any;
gzip_comp_level 6;
gzip_types text/plain text/css text/xml text/javascript
application/json application/javascript application/xml+rss
application/xml application/xhtml+xml image/svg+xml;
# 上游服务
upstream flaskapp {
server web:8000;
keepalive 32;
}
# HTTP服务器(重定向到HTTPS)
server {
listen 80;
server_name ${DOMAIN_NAME} www.${DOMAIN_NAME};
# Let's Encrypt验证
location /.well-known/acme-challenge/ {
root /var/www/certbot;
}
# 重定向到HTTPS
location / {
return 301 https://$server_name$request_uri;
}
}
# HTTPS服务器
server {
listen 443 ssl http2;
server_name ${DOMAIN_NAME} www.${DOMAIN_NAME};
# SSL证书
ssl_certificate /etc/nginx/ssl/live/${DOMAIN_NAME}/fullchain.pem;
ssl_certificate_key /etc/nginx/ssl/live/${DOMAIN_NAME}/privkey.pem;
# SSL优化
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
ssl_session_tickets off;
# 安全头
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
# 静态文件
location /static/ {
alias /var/www/static/;
expires 1y;
add_header Cache-Control "public, immutable";
}
location /uploads/ {
alias /var/www/uploads/;
expires 30d;
add_header Cache-Control "public";
# 安全限制
internal;
}
# 反向代理到Flask应用
location / {
proxy_pass http://flaskapp;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $server_name;
proxy_connect_timeout 75s;
proxy_send_timeout 3600s;
proxy_read_timeout 3600s;
proxy_buffer_size 128k;
proxy_buffers 256 16k;
proxy_busy_buffers_size 256k;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# 健康检查
location /health {
proxy_pass http://flaskapp;
access_log off;
}
}
}
# Procfile - Heroku进程文件
web: gunicorn app:create_app()
# runtime.txt - Python版本
python-3.11.5
# requirements.txt - 依赖文件(Heroku自动检测)
# app.json - Heroku应用描述
{
"name": "Flask Application",
"description": "A production-ready Flask application",
"repository": "https://github.com/yourusername/flaskapp",
"keywords": ["python", "flask", "gunicorn"],
"env": {
"FLASK_ENV": {
"value": "production"
},
"SECRET_KEY": {
"generator": "secret"
}
},
"addons": [
"heroku-postgresql:hobby-dev",
"heroku-redis:hobby-dev"
],
"buildpacks": [
{
"url": "heroku/python"
}
]
}
# 部署命令
# 1. 安装Heroku CLI
curl https://cli-assets.heroku.com/install.sh | sh
# 2. 登录
heroku login
# 3. 创建应用
heroku create flaskapp-production
heroku addons:create heroku-postgresql:hobby-dev
heroku addons:create heroku-redis:hobby-dev
# 4. 设置环境变量
heroku config:set FLASK_ENV=production
heroku config:set SECRET_KEY=$(openssl rand -base64 32)
heroku config:set DATABASE_URL=$(heroku config:get DATABASE_URL)
heroku config:set REDIS_URL=$(heroku config:get REDIS_URL)
# 5. 部署代码
git push heroku main
# 6. 查看日志
heroku logs --tail
# 7. 打开应用
heroku open
# .ebextensions/01-packages.config - 安装系统包
packages:
yum:
postgresql-devel: []
gcc: []
python3-devel: []
# .ebextensions/02-python.config - Python配置
option_settings:
aws:elasticbeanstalk:application:environment:
FLASK_ENV: production
PYTHONPATH: /var/app/current:$PYTHONPATH
aws:elasticbeanstalk:container:python:
WSGIPath: application:app
NumProcesses: 3
NumThreads: 20
# .ebextensions/03-nginx.config - Nginx配置
files:
"/etc/nginx/conf.d/proxy.conf":
mode: "000644"
owner: root
group: root
content: |
client_max_body_size 50M;
# requirements.txt - 依赖文件
# application.py - 应用入口
from app import create_app
application = create_app()
if __name__ == "__main__":
application.run()
# 部署命令
# 1. 安装EB CLI
pip install awsebcli
# 2. 初始化
eb init -p python-3.11 flaskapp
# 3. 创建环境
eb create flaskapp-production \
--single \
--instance-types t3.small \
--database \
--database.username flaskuser \
--database.password securepassword \
--database.engine postgres \
--vpc.elbpublic \
--envvars FLASK_ENV=production,SECRET_KEY=your-secret-key
# 4. 部署
eb deploy
# 5. 查看状态
eb status
eb logs
# Dockerfile for Cloud Run
FROM python:3.11-slim
WORKDIR /app
# 安装依赖
COPY requirements/production.txt .
RUN pip install --no-cache-dir -r production.txt gunicorn
# 复制应用代码
COPY . .
# 设置环境变量
ENV FLASK_ENV=production
ENV PORT=8080
# 运行应用
CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 app:create_app()
# cloudbuild.yaml - Cloud Build配置
steps:
# 构建Docker镜像
- name: 'gcr.io/cloud-builders/docker'
args: ['build', '-t', 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA', '.']
# 推送到Container Registry
- name: 'gcr.io/cloud-builders/docker'
args: ['push', 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA']
# 部署到Cloud Run
- name: 'gcr.io/cloud-builders/gcloud'
args:
- 'run'
- 'deploy'
- 'flaskapp'
- '--image'
- 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA'
- '--platform'
- 'managed'
- '--region'
- 'us-central1'
- '--allow-unauthenticated'
- '--memory'
- '512Mi'
- '--cpu'
- '1'
- '--max-instances'
- '10'
- '--set-env-vars'
- 'FLASK_ENV=production,SECRET_KEY=${_SECRET_KEY}'
# 从Secret Manager获取密钥
availableSecrets:
secretManager:
- versionName: projects/$PROJECT_ID/secrets/SECRET_KEY/versions/latest
env: '_SECRET_KEY'
images:
- 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA'
自动扩展、自我修复、服务发现、负载均衡,适合大规模生产环境。
# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: flaskapp
labels:
name: flaskapp
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: flaskapp-config
namespace: flaskapp
data:
# 应用配置
FLASK_ENV: "production"
LOG_LEVEL: "INFO"
CORS_ORIGINS: "https://yourdomain.com"
# 数据库配置
DB_HOST: "postgres-service"
DB_PORT: "5432"
DB_NAME: "flaskapp_production"
# Redis配置
REDIS_HOST: "redis-service"
REDIS_PORT: "6379"
# Gunicorn配置
WORKERS: "4"
THREADS: "2"
TIMEOUT: "30"
# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: flaskapp-secrets
namespace: flaskapp
type: Opaque
stringData:
SECRET_KEY: "production-secret-key"
DB_USER: "flaskuser"
DB_PASSWORD: "production-db-password"
MAIL_PASSWORD: "mail-app-password"
STRIPE_SECRET_KEY: "sk_live_xxxxxxxxxxxxxx"
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: flaskapp-deployment
namespace: flaskapp
labels:
app: flaskapp
tier: web
spec:
replicas: 3
selector:
matchLabels:
app: flaskapp
tier: web
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
labels:
app: flaskapp
tier: web
spec:
containers:
- name: flaskapp
image: yourregistry.com/flaskapp:latest
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
env:
- name: SECRET_KEY
valueFrom:
secretKeyRef:
name: flaskapp-secrets
key: SECRET_KEY
- name: DATABASE_URL
value: "postgresql://$(DB_USER):$(DB_PASSWORD)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)"
envFrom:
- configMapRef:
name: flaskapp-config
- secretRef:
name: flaskapp-secrets
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: 8000
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 1
volumeMounts:
- name: uploads-volume
mountPath: /app/uploads
- name: logs-volume
mountPath: /app/logs
securityContext:
runAsUser: 1000
runAsGroup: 1000
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
volumes:
- name: uploads-volume
persistentVolumeClaim:
claimName: flaskapp-uploads-pvc
- name: logs-volume
emptyDir: {}
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
name: flaskapp-service
namespace: flaskapp
labels:
app: flaskapp
spec:
selector:
app: flaskapp
tier: web
ports:
- port: 80
targetPort: 8000
protocol: TCP
name: http
type: ClusterIP
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: flaskapp-ingress
namespace: flaskapp
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
tls:
- hosts:
- yourdomain.com
secretName: flaskapp-tls
rules:
- host: yourdomain.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: flaskapp-service
port:
number: 80
# k8s/hpa.yaml - 水平自动扩展
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: flaskapp-hpa
namespace: flaskapp
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: flaskapp-deployment
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
没有监控的应用就像在黑暗中开车,你不知道何时会出问题。
# monitoring.py - 应用监控配置
import logging
from logging.handlers import RotatingFileHandler, SMTPHandler
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from prometheus_flask_exporter import PrometheusMetrics
def setup_monitoring(app):
"""配置应用监控"""
# === Sentry错误监控 ===
if app.config.get('SENTRY_DSN'):
sentry_sdk.init(
dsn=app.config['SENTRY_DSN'],
integrations=[FlaskIntegration()],
environment=app.config.get('ENVIRONMENT', 'production'),
release=app.config.get('VERSION', '1.0.0'),
traces_sample_rate=0.1,
profiles_sample_rate=0.1,
)
app.logger.info('Sentry监控已启用')
# === Prometheus指标 ===
metrics = PrometheusMetrics(app)
# 自定义指标
requests_counter = metrics.counter(
'flaskapp_requests_total',
'Total number of requests',
labels={'endpoint': lambda: request.endpoint}
)
@app.before_request
def before_request():
requests_counter.inc()
# === 结构化日志 ===
if not app.debug:
# 文件日志
file_handler = RotatingFileHandler(
app.config.get('LOG_FILE', 'app.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=10
)
file_handler.setLevel(getattr(logging, app.config.get('LOG_LEVEL', 'INFO')))
file_handler.setFormatter(logging.Formatter(
'%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
))
app.logger.addHandler(file_handler)
# 邮件错误通知
if app.config.get('MAIL_SERVER') and app.config.get('ADMIN_EMAIL'):
mail_handler = SMTPHandler(
mailhost=(app.config['MAIL_SERVER'], app.config['MAIL_PORT']),
fromaddr=app.config['MAIL_DEFAULT_SENDER'],
toaddrs=[app.config['ADMIN_EMAIL']],
subject='应用错误通知',
credentials=(app.config['MAIL_USERNAME'], app.config['MAIL_PASSWORD']),
secure=() if app.config.get('MAIL_USE_TLS') else None
)
mail_handler.setLevel(logging.ERROR)
app.logger.addHandler(mail_handler)
app.logger.setLevel(getattr(logging, app.config.get('LOG_LEVEL', 'INFO')))
# 健康检查端点
@app.route('/health')
def health_check():
"""健康检查端点"""
checks = {
'database': check_database(),
'redis': check_redis(),
'disk_space': check_disk_space(),
}
status = 200 if all(checks.values()) else 503
return jsonify({
'status': 'healthy' if status == 200 else 'unhealthy',
'timestamp': datetime.now().isoformat(),
'checks': checks
}), status
# 指标端点
@app.route('/metrics')
@login_required
@admin_required
def metrics():
"""Prometheus指标端点"""
return generate_latest()
def check_database():
"""检查数据库连接"""
try:
db.session.execute('SELECT 1')
return True
except Exception as e:
app.logger.error(f'数据库检查失败: {e}')
return False
def check_redis():
"""检查Redis连接"""
try:
redis_client.ping()
return True
except Exception as e:
app.logger.error(f'Redis检查失败: {e}')
return False
#!/bin/bash
# backup.sh - 数据库和文件备份脚本
# 配置
BACKUP_DIR="/var/backups/flaskapp"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30
# 创建备份目录
mkdir -p $BACKUP_DIR
# 数据库备份
DB_BACKUP_FILE="$BACKUP_DIR/db_backup_$DATE.sql"
echo "开始数据库备份..."
pg_dump -h localhost -U flask_user flask_production > $DB_BACKUP_FILE
# 压缩备份文件
gzip $DB_BACKUP_FILE
# 上传文件备份
UPLOADS_BACKUP_FILE="$BACKUP_DIR/uploads_backup_$DATE.tar.gz"
echo "开始上传文件备份..."
tar -czf $UPLOADS_BACKUP_FILE -C /var/www/flaskapp/uploads .
# 备份日志文件
LOGS_BACKUP_FILE="$BACKUP_DIR/logs_backup_$DATE.tar.gz"
tar -czf $LOGS_BACKUP_FILE -C /var/log/gunicorn .
# 上传到云存储(可选)
if [ -n "$AWS_ACCESS_KEY_ID" ]; then
echo "上传备份到S3..."
aws s3 cp $DB_BACKUP_FILE.gz s3://your-bucket/backups/db/
aws s3 cp $UPLOADS_BACKUP_FILE s3://your-bucket/backups/uploads/
fi
# 清理旧备份
find $BACKUP_DIR -name "*.gz" -type f -mtime +$RETENTION_DAYS -delete
echo "备份完成: $DATE"
# 添加到crontab实现自动备份
# 每天凌晨2点执行备份
# 0 2 * * * /path/to/backup.sh >> /var/log/backup.log 2>&1
#!/bin/bash
# security_hardening.sh - 服务器安全加固
# 更新系统
apt update && apt upgrade -y
# 安装基础安全工具
apt install -y fail2ban ufw unattended-upgrades
# 配置防火墙
ufw default deny incoming
ufw default allow outgoing
ufw allow ssh
ufw allow http
ufw allow https
ufw enable
# 配置SSH安全
sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
sed -i 's/^PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
echo "AllowUsers flaskapp" >> /etc/ssh/sshd_config
systemctl restart sshd
# 配置fail2ban
cat > /etc/fail2ban/jail.local << EOF
[sshd]
enabled = true
port = ssh
filter = sshd
logpath = /var/log/auth.log
maxretry = 3
bantime = 3600
[nginx-http-auth]
enabled = true
filter = nginx-http-auth
port = http,https
logpath = /var/log/nginx/error.log
maxretry = 3
bantime = 3600
EOF
systemctl enable fail2ban
systemctl start fail2ban
# 自动安全更新
cat > /etc/apt/apt.conf.d/50unattended-upgrades << EOF
Unattended-Upgrade::Allowed-Origins {
"\${distro_id}:\${distro_codename}-security";
"\${distro_id}:\${distro_codename}-updates";
};
Unattended-Upgrade::AutoFixInterruptedDpkg "true";
Unattended-Upgrade::MinimalSteps "true";
Unattended-Upgrade::Remove-Unused-Dependencies "true";
Unattended-Upgrade::Automatic-Reboot "true";
Unattended-Upgrade::Automatic-Reboot-Time "02:00";
EOF
echo "安全加固完成!"
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
# 代码检查
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black isort mypy
- name: Lint with flake8
run: |
flake8 app tests --count --max-complexity=10 --statistics
- name: Check formatting with black
run: |
black --check app tests
- name: Check imports with isort
run: |
isort --check-only app tests
- name: Type check with mypy
run: |
mypy app
# 测试
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7-alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/development.txt
- name: Run tests with pytest
env:
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
REDIS_URL: redis://localhost:6379/0
SECRET_KEY: test-secret-key
run: |
pytest --cov=app --cov-report=xml --cov-report=html
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
- name: Archive test results
uses: actions/upload-artifact@v3
if: always()
with:
name: test-results
path: |
htmlcov/
.coverage
# 构建和推送Docker镜像
build:
needs: [lint, test]
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Container Registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=sha
type=raw,value=latest
- name: Build and push
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
# 部署到生产环境
deploy:
needs: build
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Deploy to Kubernetes
uses: azure/k8s-deploy@v4
with:
namespace: flaskapp
manifests: |
k8s/namespace.yaml
k8s/configmap.yaml
k8s/secret.yaml
k8s/deployment.yaml
k8s/service.yaml
k8s/ingress.yaml
images: |
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
kubectl-version: 'latest'
- name: Verify deployment
run: |
kubectl rollout status deployment/flaskapp-deployment -n flaskapp --timeout=300s
- name: Run smoke tests
run: |
# 执行冒烟测试
curl -f https://yourdomain.com/health || exit 1
- name: Notify on success
if: success()
uses: 8398a7/action-slack@v3
with:
channel: '#deployments'
status: ${{ job.status }}
author_name: GitHub Actions
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
- name: Notify on failure
if: failure()
uses: 8398a7/action-slack@v3
with:
channel: '#alerts'
status: ${{ job.status }}
author_name: GitHub Actions
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
根据项目需求选择部署方案:
| 项目类型 | 推荐方案 | 理由 |
|---|---|---|
| 个人项目/原型 | Heroku / Railway | 简单快速,无需运维 |
| 中小型网站 | VPS + Docker | 成本可控,灵活部署 |
| 企业级应用 | Kubernetes | 高可用,自动扩展 |
| 流量波动大 | Serverless | 按需付费,自动扩展 |
| 全球用户 | 多区域部署 + CDN | 低延迟,高可用 |
决策树:
生产环境性能优化策略:
# performance_optimization.py
from flask import Flask
import gunicorn
app = Flask(__name__)
# === 1. 数据库优化 ===
# 使用连接池
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
'pool_size': 20,
'pool_recycle': 3600,
'pool_pre_ping': True,
'max_overflow': 10,
}
# 启用查询缓存
from sqlalchemy import event
from sqlalchemy.orm import Session
@event.listens_for(Session, 'do_orm_execute')
def receive_do_orm_execute(orm_execute_state):
if orm_execute_state.is_select:
orm_execute_state.statement = orm_execute_state.statement.execution_options(
cache_key="my_cache_key"
)
# === 2. 缓存优化 ===
from flask_caching import Cache
cache = Cache(app, config={
'CACHE_TYPE': 'redis',
'CACHE_REDIS_URL': app.config['REDIS_URL'],
'CACHE_DEFAULT_TIMEOUT': 300,
'CACHE_KEY_PREFIX': 'flask_cache:',
})
# 视图缓存
@app.route('/expensive-operation')
@cache.cached(timeout=300)
def expensive_operation():
# 耗时操作
return expensive_computation()
# === 3. 异步任务 ===
from celery import Celery
celery = Celery(
app.name,
broker=app.config['CELERY_BROKER_URL'],
backend=app.config['CELERY_RESULT_BACKEND']
)
@celery.task
def send_email_async(to, subject, body):
# 异步发送邮件
send_email(to, subject, body)
# === 4. 静态文件优化 ===
# 使用CDN
@app.context_processor
def inject_cdn():
return {'CDN_URL': app.config.get('CDN_URL', '')}
# 在模板中使用
# <img src="{{ CDN_URL }}/static/images/logo.png">
# === 5. Gunicorn配置优化 ===
# gunicorn_config.py
import multiprocessing
# 根据CPU核心数调整workers
workers = multiprocessing.cpu_count() * 2 + 1
# 使用异步worker提高并发
worker_class = 'gevent'
worker_connections = 1000
# 启用keep-alive
keepalive = 2
# === 6. 数据库查询优化 ===
@app.before_request
def before_request():
# 监控慢查询
import time
g.start_time = time.time()
@app.after_request
def after_request(response):
# 记录慢查询
import time
duration = time.time() - g.start_time
if duration > 1.0: # 超过1秒的查询
app.logger.warning(f"慢请求: {request.path} - {duration:.2f}s")
return response
# === 7. 启用压缩 ===
from flask_compress import Compress
Compress(app)
# === 8. 数据库索引优化 ===
# 确保常用查询字段有索引
# 定期分析查询性能
# 使用EXPLAIN ANALYZE分析查询计划
# === 9. 监控和调优 ===
@app.route('/performance')
@admin_required
def performance_metrics():
"""性能指标页面"""
import psutil
import resource
metrics = {
'memory': psutil.virtual_memory()._asdict(),
'cpu_percent': psutil.cpu_percent(interval=1),
'disk_usage': psutil.disk_usage('/')._asdict(),
'open_files': resource.getrlimit(resource.RLIMIT_NOFILE),
'process_memory': psutil.Process().memory_info()._asdict(),
}
return jsonify(metrics)
零停机部署策略:
# zero_downtime_deployment.py
from flask import Flask, request
import time
import signal
import sys
import threading
app = Flask(__name__)
class GracefulShutdown:
"""优雅关闭管理器"""
def __init__(self):
self.should_exit = False
self.lock = threading.Lock()
self.active_requests = 0
def increment(self):
with self.lock:
self.active_requests += 1
def decrement(self):
with self.lock:
self.active_requests -= 1
def should_stop(self):
with self.lock:
return self.should_exit and self.active_requests == 0
shutdown_manager = GracefulShutdown()
# 请求计数中间件
@app.before_request
def before_request():
if shutdown_manager.should_exit:
return {'error': '服务器正在重启,请稍后重试'}, 503
shutdown_manager.increment()
@app.after_request
def after_request(response):
shutdown_manager.decrement()
return response
# 优雅关闭信号处理
def handle_shutdown(signum, frame):
print("收到关闭信号,开始优雅关闭...")
shutdown_manager.should_exit = True
# 等待活跃请求完成
while not shutdown_manager.should_stop():
print(f"等待 {shutdown_manager.active_requests} 个活跃请求完成...")
time.sleep(1)
print("所有请求处理完成,安全退出")
sys.exit(0)
# 注册信号处理器
signal.signal(signal.SIGTERM, handle_shutdown)
signal.signal(signal.SIGINT, handle_shutdown)
# 健康检查端点(用于负载均衡器)
@app.route('/health')
def health_check():
"""健康检查端点"""
if shutdown_manager.should_exit:
return {'status': 'draining'}, 503
checks = {
'database': check_database(),
'redis': check_redis(),
}
return {'status': 'healthy', 'checks': checks}, 200
# === Docker健康检查配置 ===
# Dockerfile:
# HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
# CMD curl -f http://localhost:5000/health || exit 1
# === Kubernetes就绪探针 ===
# readinessProbe:
# httpGet:
# path: /health
# port: 5000
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 3
# === 蓝绿部署策略 ===
# 1. 部署新版本到新环境(绿色)
# 2. 运行测试验证新版本
# 3. 切换流量到新版本
# 4. 监控新版本运行情况
# 5. 如果发现问题,快速回滚到旧版本(蓝色)
# === 金丝雀发布 ===
# 1. 部署新版本到少数实例
# 2. 将少量用户流量导向新版本
# 3. 监控新版本性能和错误率
# 4. 如果一切正常,逐步增加流量
# 5. 最终完全切换到新版本
# === 数据库迁移策略 ===
def perform_database_migration():
"""零停机数据库迁移"""
# 1. 向后兼容的数据库更改
# 2. 部署支持新旧模式的应用版本
# 3. 数据迁移(使用后台任务)
# 4. 验证数据一致性
# 5. 部署只使用新模式的版本
# 6. 清理旧模式
pass
# === 使用Nginx实现零停机 ===
# Nginx配置:
location / {
proxy_pass http://backend;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_connect_timeout 2s;
proxy_read_timeout 30s;
}
# upstream配置使用健康检查
upstream backend {
server backend1:5000 max_fails=3 fail_timeout=30s;
server backend2:5000 max_fails=3 fail_timeout=30s;
server backend3:5000 max_fails=3 fail_timeout=30s backup;
}
生产环境监控体系:
# monitoring_system.py
import logging
from datetime import datetime
from dataclasses import dataclass
from typing import Dict, Any
from flask import Flask, request
import statsd
from prometheus_client import Counter, Histogram, generate_latest
import sentry_sdk
app = Flask(__name__)
# === 1. 应用指标 ===
# Prometheus指标
REQUEST_COUNT = Counter(
'flaskapp_requests_total',
'Application Request Count',
['method', 'endpoint', 'http_status']
)
REQUEST_LATENCY = Histogram(
'flaskapp_request_latency_seconds',
'Request latency',
['method', 'endpoint']
)
ERROR_COUNT = Counter(
'flaskapp_errors_total',
'Application Error Count',
['error_type']
)
DATABASE_QUERY_DURATION = Histogram(
'flaskapp_db_query_duration_seconds',
'Database query duration'
)
# === 2. 请求监控中间件 ===
@app.before_request
def before_request():
request.start_time = datetime.now()
request.request_id = str(uuid.uuid4())
@app.after_request
def after_request(response):
# 记录请求指标
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.endpoint,
http_status=response.status_code
).inc()
# 记录延迟指标
latency = (datetime.now() - request.start_time).total_seconds()
REQUEST_LATENCY.labels(
method=request.method,
endpoint=request.endpoint
).observe(latency)
# 添加请求ID到响应头
response.headers['X-Request-ID'] = request.request_id
# 记录慢请求
if latency > 1.0: # 超过1秒
app.logger.warning(f"慢请求: {request.path} - {latency:.2f}s")
return response
@app.errorhandler(Exception)
def handle_exception(error):
# 记录错误指标
ERROR_COUNT.labels(error_type=type(error).__name__).inc()
# 发送到Sentry
sentry_sdk.capture_exception(error)
# 返回错误响应
return {'error': '服务器内部错误', 'request_id': request.request_id}, 500
# === 3. 数据库监控 ===
from sqlalchemy import event
from sqlalchemy.engine import Engine
@event.listens_for(Engine, "before_cursor_execute")
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
conn.info.setdefault('query_start_time', []).append(datetime.now())
@event.listens_for(Engine, "after_cursor_execute")
def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
total = (datetime.now() - conn.info['query_start_time'].pop(-1)).total_seconds()
DATABASE_QUERY_DURATION.observe(total)
# 记录慢查询
if total > 0.5: # 超过0.5秒
app.logger.warning(f"慢查询: {statement[:200]} - {total:.2f}s")
# === 4. 业务指标 ===
USER_REGISTRATION_COUNT = Counter(
'flaskapp_user_registrations_total',
'User registration count'
)
ORDER_COUNT = Counter(
'flaskapp_orders_total',
'Order count',
['status']
)
API_CALL_COUNT = Counter(
'flaskapp_api_calls_total',
'API call count',
['api_name', 'status']
)
# === 5. 系统监控 ===
import psutil
from threading import Thread
import time
class SystemMonitor(Thread):
"""系统监控线程"""
def __init__(self, app):
super().__init__(daemon=True)
self.app = app
def run(self):
while True:
try:
# 监控CPU使用率
cpu_percent = psutil.cpu_percent(interval=1)
# 监控内存使用
memory = psutil.virtual_memory()
# 监控磁盘使用
disk = psutil.disk_usage('/')
# 记录到日志
self.app.logger.info(
f"系统指标: CPU={cpu_percent}%, "
f"内存={memory.percent}%, "
f"磁盘={disk.percent}%"
)
# 发送到监控系统
self.send_to_monitoring({
'cpu_percent': cpu_percent,
'memory_percent': memory.percent,
'disk_percent': disk.percent,
})
time.sleep(60) # 每分钟检查一次
except Exception as e:
self.app.logger.error(f"系统监控错误: {e}")
time.sleep(60)
def send_to_monitoring(self, metrics):
"""发送指标到监控系统"""
# 这里可以集成StatsD、Datadog、New Relic等
pass
# 启动系统监控
monitor = SystemMonitor(app)
monitor.start()
# === 6. 日志聚合 ===
import structlog
def setup_structured_logging(app):
"""配置结构化日志"""
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
app.logger = structlog.get_logger()
# === 7. 报警规则 ===
@dataclass
class AlertRule:
"""报警规则"""
name: str
condition: callable
severity: str # critical, warning, info
notification_channels: list
class AlertManager:
"""报警管理器"""
def __init__(self):
self.rules = []
def add_rule(self, rule: AlertRule):
self.rules.append(rule)
def check_alerts(self, metrics: Dict[str, Any]):
"""检查报警规则"""
alerts = []
for rule in self.rules:
if rule.condition(metrics):
alerts.append({
'rule': rule.name,
'severity': rule.severity,
'timestamp': datetime.now().isoformat(),
'metrics': metrics
})
# 发送通知
self.send_notification(rule, metrics)
return alerts
def send_notification(self, rule: AlertRule, metrics: Dict[str, Any]):
"""发送报警通知"""
# 发送到Slack
# 发送到邮件
# 发送到短信
# 发送到电话
pass
# 配置报警规则
alert_manager = AlertManager()
alert_manager.add_rule(AlertRule(
name="高CPU使用率",
condition=lambda m: m.get('cpu_percent', 0) > 80,
severity="warning",
notification_channels=["slack", "email"]
))
alert_manager.add_rule(AlertRule(
name="高内存使用率",
condition=lambda m: m.get('memory_percent', 0) > 90,
severity="critical",
notification_channels=["slack", "email", "sms"]
))
# === 8. 监控端点 ===
@app.route('/metrics')
def metrics():
"""Prometheus指标端点"""
return generate_latest()
@app.route('/health')
def health():
"""健康检查端点"""
return {'status': 'healthy'}, 200
@app.route('/status')
@admin_required
def status():
"""应用状态页面"""
import psutil
import resource
status = {
'application': {
'uptime': time.time() - psutil.Process().create_time(),
'version': app.config.get('VERSION', '1.0.0'),
'environment': app.config.get('FLASK_ENV', 'production'),
},
'system': {
'cpu_percent': psutil.cpu_percent(interval=1),
'memory_percent': psutil.virtual_memory().percent,
'disk_percent': psutil.disk_usage('/').percent,
},
'requests': {
'active': shutdown_manager.active_requests,
}
}
return jsonify(status)