Flask应用部署

部署准备:将应用从开发环境迁移到生产环境需要仔细规划和配置。本章将指导你完成整个过程。

为什么需要专业部署?

开发环境与生产环境的差异:

并发用户

开发:单个用户
生产:成百上千用户

安全要求

开发:宽松
生产:严格安全策略

性能需求

开发:响应即可
生产:高速响应,高可用

部署流程概览

代码准备
优化生产代码
服务器配置
设置生产服务器
容器化
Docker封装
部署上线
发布到云平台
监控维护
持续监控优化

部署平台选择

平台类型 代表服务 优点 缺点 适用场景
传统VPS DigitalOcean, Linode 完全控制,成本低 需要手动配置 中小型项目,有运维能力
PaaS平台 Heroku, Railway 简单易用,自动扩展 成本较高,控制有限 快速原型,初创公司
容器平台 Docker, Kubernetes 高度可移植,弹性伸缩 学习曲线陡峭 微服务,大规模应用
云服务商 AWS, GCP, Azure 服务全面,全球覆盖 复杂,成本不可预测 企业级应用,全球业务
Serverless AWS Lambda, Vercel 按需付费,自动扩展 冷启动延迟,限制多 事件驱动,流量波动大

部署前准备

1. 生产环境代码优化

重要:禁用调试模式!

在生产环境中必须禁用调试模式,否则会暴露敏感信息和安全漏洞。

# app/__init__.py - 确保生产环境配置正确
import os
from flask import Flask

def create_app():
    app = Flask(__name__)

    # 根据环境变量加载配置
    env = os.environ.get('FLASK_ENV', 'development')

    if env == 'production':
        app.config.from_object('config.ProductionConfig')
        # 验证关键配置
        required_vars = ['SECRET_KEY', 'DATABASE_URL']
        for var in required_vars:
            if not app.config.get(var):
                raise ValueError(f"生产环境必须设置 {var}")
    else:
        app.config.from_object('config.DevelopmentConfig')

    # 禁用调试信息
    app.debug = False
    app.config['DEBUG'] = False
    app.config['TESTING'] = False

    # 安全配置
    app.config['SESSION_COOKIE_SECURE'] = True  # 仅HTTPS
    app.config['REMEMBER_COOKIE_SECURE'] = True
    app.config['SESSION_COOKIE_HTTPONLY'] = True
    app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'

    # 性能优化
    app.config['JSON_SORT_KEYS'] = False
    app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False

    # 确保所有扩展都已正确配置
    configure_extensions(app)
    register_blueprints(app)

    return app

# 创建应用实例
app = create_app()

# 只在开发环境启用调试路由
if os.environ.get('FLASK_ENV') != 'production':
    @app.route('/debug')
    def debug_info():
        return "调试信息"

2. 依赖管理

# requirements.txt - 生产环境依赖
# 基础依赖
Flask==2.3.3
Werkzeug==2.3.7
Jinja2==3.1.2
itsdangerous==2.1.2
click==8.1.3

# 数据库
SQLAlchemy==2.0.19
Flask-SQLAlchemy==3.0.5
psycopg2-binary==2.9.7  # PostgreSQL驱动
redis==4.6.0

# WSGI服务器
gunicorn==21.2.0
gevent==23.9.1

# 安全
Flask-Login==0.6.2
Flask-WTF==1.1.1
Flask-CORS==4.0.0
bcrypt==4.0.1
cryptography==41.0.5

# 实用工具
python-dotenv==1.0.0
requests==2.31.0
celery==5.3.4
Flask-Mail==0.9.1

# 监控和日志
blinker==1.7.0
structlog==23.1.0
sentry-sdk[flask]==1.35.0

# 使用requirements.in和pip-tools进行更精细的控制
# requirements.in 文件内容:
# Flask
# SQLAlchemy
# gunicorn
# redis
#
# 然后运行:pip-compile requirements.in > requirements.txt
# 创建分层依赖文件
# requirements/
# ├── base.txt      # 基础依赖
# ├── production.txt # 生产环境
# └── development.txt # 开发环境

# base.txt
cat > requirements/base.txt << EOF
Flask>=2.3.0,<3.0.0
Werkzeug>=2.3.0,<3.0.0
Jinja2>=3.1.0,<4.0.0
itsdangerous>=2.1.0,<3.0.0
click>=8.0.0,<9.0.0
python-dotenv>=1.0.0,<2.0.0
EOF

# production.txt
cat > requirements/production.txt << EOF
-r base.txt
gunicorn>=20.0.0,<22.0.0
gevent>=22.0.0,<24.0.0
psycopg2-binary>=2.9.0,<3.0.0
redis>=4.5.0,<5.0.0
sentry-sdk[flask]>=1.30.0,<2.0.0
EOF

# development.txt
cat > requirements/development.txt << EOF
-r base.txt
pytest>=7.0.0,<8.0.0
pytest-cov>=4.0.0,<5.0.0
black>=23.0.0,<24.0.0
flake8>=6.0.0,<7.0.0
Flask-DebugToolbar>=0.13.0,<1.0.0
EOF

# 安装命令
pip install -r requirements/production.txt  # 生产环境
pip install -r requirements/development.txt # 开发环境

3. 环境变量管理

环境变量配置文件示例
推荐 易用
# .env.production - 生产环境变量(不要提交到版本控制)
# ============ 应用配置 ============
FLASK_ENV=production
FLASK_APP=app:create_app()
SECRET_KEY=your-production-secret-key-change-this
SECURITY_PASSWORD_SALT=your-salt-change-this

# ============ 数据库配置 ============
DATABASE_URL=postgresql://username:password@localhost:5432/production_db
REDIS_URL=redis://localhost:6379/0

# ============ 邮件配置 ============
MAIL_SERVER=smtp.gmail.com
MAIL_PORT=587
MAIL_USE_TLS=true
MAIL_USERNAME=your-email@gmail.com
MAIL_PASSWORD=your-app-password
MAIL_DEFAULT_SENDER=noreply@example.com

# ============ 第三方服务 ============
STRIPE_SECRET_KEY=sk_live_xxxxxxxxxxxxxx
STRIPE_PUBLIC_KEY=pk_live_xxxxxxxxxxxxxx
GOOGLE_MAPS_API_KEY=AIzaSyxxxxxxxxxxxxxxxxxxxxxx
AWS_ACCESS_KEY_ID=AKIAxxxxxxxxxxxxxx
AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

# ============ 监控和日志 ============
SENTRY_DSN=https://xxxxxxxxxxxxxxxx@sentry.io/xxxxxxx
LOG_LEVEL=WARNING
LOG_FILE=/var/log/flask/app.log

# ============ 性能配置 ============
WORKERS=4
THREADS=2
TIMEOUT=30
MAX_REQUESTS=1000
MAX_REQUESTS_JITTER=50

# ============ 安全配置 ============
SESSION_COOKIE_SECURE=true
REMEMBER_COOKIE_SECURE=true
CORS_ORIGINS=https://yourdomain.com

# ============ 应用特定配置 ============
ITEMS_PER_PAGE=20
UPLOAD_FOLDER=/var/www/uploads
MAX_CONTENT_LENGTH=52428800  # 50MB

传统服务器部署

1. 服务器架构

用户

HTTP/HTTPS请求

Nginx

反向代理,静态文件,SSL

Gunicorn

WSGI服务器,进程管理

Flask应用

业务逻辑处理

2. 服务器环境配置

#!/bin/bash
# setup_server.sh - Ubuntu/Debian服务器初始化脚本

# 更新系统
sudo apt update
sudo apt upgrade -y

# 安装基础工具
sudo apt install -y curl wget git build-essential software-properties-common

# 安装Python 3.11
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update
sudo apt install -y python3.11 python3.11-venv python3.11-dev

# 设置Python 3.11为默认
sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1

# 安装PostgreSQL
sudo apt install -y postgresql postgresql-contrib libpq-dev

# 配置PostgreSQL
sudo -u postgres psql << EOF
CREATE DATABASE flask_production;
CREATE USER flask_user WITH PASSWORD 'secure_password';
ALTER ROLE flask_user SET client_encoding TO 'utf8';
ALTER ROLE flask_user SET default_transaction_isolation TO 'read committed';
ALTER ROLE flask_user SET timezone TO 'UTC';
GRANT ALL PRIVILEGES ON DATABASE flask_production TO flask_user;
\q
EOF

# 安装Redis
sudo apt install -y redis-server
sudo systemctl enable redis-server
sudo systemctl start redis-server

# 安装Nginx
sudo apt install -y nginx

# 创建应用用户
sudo useradd --system --group --shell /bin/bash flaskapp
sudo mkdir -p /var/www/flaskapp
sudo chown -R flaskapp:flaskapp /var/www/flaskapp

# 安装系统依赖
sudo apt install -y libjpeg-dev libpng-dev libfreetype6-dev

echo "服务器初始化完成!"

3. 使用Gunicorn部署

Gunicorn优势

Gunicorn是一个成熟的Python WSGI HTTP服务器,与Flask完美集成,支持多进程和多线程。

# gunicorn_config.py - Gunicorn配置文件
import os
import multiprocessing

# 服务器配置
bind = "127.0.0.1:8000"  # 只监听本地,通过Nginx反向代理
workers = multiprocessing.cpu_count() * 2 + 1  # 推荐公式
threads = 2  # 每个worker的线程数
worker_class = "gevent"  # 使用gevent异步worker
worker_connections = 1000  # 每个worker最大连接数

# 超时设置
timeout = 30  # 请求超时时间(秒)
keepalive = 2  # keep-alive连接数
graceful_timeout = 30  # 优雅关闭超时

# 日志配置
accesslog = "/var/log/gunicorn/access.log"
errorlog = "/var/log/gunicorn/error.log"
loglevel = "info"
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'

# 进程管理
max_requests = 1000  # worker处理请求数后重启
max_requests_jitter = 50  # 随机抖动,防止所有worker同时重启
preload_app = True  # 预加载应用,提高性能

# 安全配置
limit_request_line = 4094  # 限制请求行大小
limit_request_fields = 100  # 限制请求头数量
limit_request_field_size = 8190  # 限制请求头大小

# 性能调优
backlog = 2048  # 挂起连接队列大小

def when_ready(server):
    """服务器启动时调用"""
    server.log.info("Gunicorn服务器已启动")

def worker_int(worker):
    """worker异常退出时调用"""
    worker.log.info("worker异常退出: %s", worker.pid)

def worker_abort(worker):
    """worker被终止时调用"""
    worker.log.info("worker被终止: %s", worker.pid)
# 创建Systemd服务文件
# /etc/systemd/system/flaskapp.service

cat > /etc/systemd/system/flaskapp.service << EOF
[Unit]
Description=Flask Application with Gunicorn
After=network.target postgresql.service redis-server.service
Requires=postgresql.service redis-server.service

[Service]
User=flaskapp
Group=flaskapp
WorkingDirectory=/var/www/flaskapp
Environment="PATH=/var/www/flaskapp/venv/bin"
Environment="FLASK_ENV=production"
EnvironmentFile=/var/www/flaskapp/.env.production

ExecStart=/var/www/flaskapp/venv/bin/gunicorn \\
          --config /var/www/flaskapp/gunicorn_config.py \\
          app:create_app()

# 优雅重启信号
ExecReload=/bin/kill -s HUP \$MAINPID
# 优雅停止信号
ExecStop=/bin/kill -s TERM \$MAINPID

# 如果进程崩溃,自动重启
Restart=on-failure
# 重启间隔
RestartSec=10

# 安全限制
PrivateTmp=true
ProtectSystem=full
NoNewPrivileges=true
ReadWritePaths=/var/www/flaskapp/uploads /var/log/gunicorn

# 资源限制
LimitNOFILE=65535
LimitNPROC=512

[Install]
WantedBy=multi-user.target
EOF

# 启用并启动服务
sudo systemctl daemon-reload
sudo systemctl enable flaskapp
sudo systemctl start flaskapp
sudo systemctl status flaskapp

4. Nginx配置

Nginx配置文件示例
推荐 流行
# /etc/nginx/sites-available/flaskapp
server {
    listen 80;
    server_name yourdomain.com www.yourdomain.com;

    # 重定向HTTP到HTTPS
    return 301 https://$server_name$request_uri;
}

server {
    listen 443 ssl http2;
    server_name yourdomain.com www.yourdomain.com;

    # SSL证书配置
    ssl_certificate /etc/letsencrypt/live/yourdomain.com/fullchain.pem;
    ssl_certificate_key /etc/letsencrypt/live/yourdomain.com/privkey.pem;

    # SSL优化配置
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
    ssl_prefer_server_ciphers off;
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;

    # 安全头
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-XSS-Protection "1; mode=block" always;
    add_header X-Content-Type-Options "nosniff" always;
    add_header Referrer-Policy "no-referrer-when-downgrade" always;
    add_header Content-Security-Policy "default-src 'self' http: https: data: blob: 'unsafe-inline'" always;

    # 上传文件大小限制
    client_max_body_size 50M;

    # 静态文件服务
    location /static/ {
        alias /var/www/flaskapp/app/static/;
        expires 1y;
        add_header Cache-Control "public, immutable";

        # 启用gzip压缩
        gzip_static on;
        gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
    }

    location /uploads/ {
        alias /var/www/flaskapp/uploads/;
        expires 30d;
        add_header Cache-Control "public";

        # 安全限制
        internal;
    }

    # 媒体文件
    location ~* \.(jpg|jpeg|png|gif|ico|css|js)$ {
        expires 7d;
        add_header Cache-Control "public, max-age=604800";
    }

    # 反向代理到Gunicorn
    location / {
        proxy_pass http://127.0.0.1:8000;

        # 代理头设置
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header X-Forwarded-Host $server_name;

        # 超时设置
        proxy_connect_timeout 75s;
        proxy_send_timeout 3600s;
        proxy_read_timeout 3600s;

        # 缓冲区优化
        proxy_buffering on;
        proxy_buffer_size 128k;
        proxy_buffers 256 16k;
        proxy_busy_buffers_size 256k;
        proxy_temp_file_write_size 256k;

        # WebSocket支持
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
    }

    # 健康检查端点
    location /health {
        proxy_pass http://127.0.0.1:8000;
        access_log off;
    }

    # 禁止访问敏感文件
    location ~ /\.(?!well-known) {
        deny all;
    }

    location ~ /\.ht {
        deny all;
    }

    # 错误页面
    error_page 500 502 503 504 /50x.html;
    location = /50x.html {
        root /usr/share/nginx/html;
    }
}
# 配置SSL证书(使用Let's Encrypt)
sudo apt install -y certbot python3-certbot-nginx

# 获取证书
sudo certbot --nginx -d yourdomain.com -d www.yourdomain.com

# 自动续期测试
sudo certbot renew --dry-run

# 启用站点
sudo ln -s /etc/nginx/sites-available/flaskapp /etc/nginx/sites-enabled/
sudo nginx -t  # 测试配置
sudo systemctl reload nginx

容器化部署(Docker)

1. Docker多阶段构建

多阶段构建优势

减少镜像体积,提高安全性,优化构建缓存,适合生产环境。

# Dockerfile - 生产环境Dockerfile
# 第一阶段:构建阶段
FROM python:3.11-slim as builder

WORKDIR /app

# 安装系统依赖
RUN apt-get update && apt-get install -y \
    gcc \
    g++ \
    libpq-dev \
    libjpeg-dev \
    libpng-dev \
    libfreetype6-dev \
    && rm -rf /var/lib/apt/lists/*

# 创建虚拟环境
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# 安装Python依赖
COPY requirements/production.txt .
RUN pip install --no-cache-dir -r production.txt

# 第二阶段:运行阶段
FROM python:3.11-slim

# 安装运行时依赖
RUN apt-get update && apt-get install -y \
    libpq5 \
    libjpeg62-turbo \
    libpng16-16 \
    libfreetype6 \
    curl \
    && rm -rf /var/lib/apt/lists/*

# 创建非root用户
RUN groupadd -r flask && useradd -r -g flask flask

# 创建应用目录
WORKDIR /app

# 从构建阶段复制虚拟环境
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# 复制应用代码
COPY . .

# 设置权限
RUN chown -R flask:flask /app
USER flask

# 创建必要的目录
RUN mkdir -p logs uploads static

# 环境变量
ENV FLASK_APP=app:create_app()
ENV FLASK_ENV=production
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app

# 暴露端口
EXPOSE 8000

# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

# 启动命令
CMD ["gunicorn", "--config", "gunicorn_config.py", "app:create_app()"]

2. Docker Compose生产配置

# docker-compose.prod.yml
version: '3.8'

services:
  web:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - ENVIRONMENT=production
    image: yourregistry.com/flaskapp:${TAG:-latest}
    container_name: flaskapp-web
    restart: unless-stopped
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
    environment:
      - FLASK_ENV=production
      - DATABASE_URL=postgresql://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME}
      - REDIS_URL=redis://redis:6379/0
      - SECRET_KEY=${SECRET_KEY}
      - MAIL_USERNAME=${MAIL_USERNAME}
      - MAIL_PASSWORD=${MAIL_PASSWORD}
    env_file:
      - .env.production
    volumes:
      - uploads:/app/uploads
      - logs:/app/logs
      - static:/app/app/static
    ports:
      - "8000:8000"
    networks:
      - app-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    deploy:
      resources:
        limits:
          memory: 512M
        reservations:
          memory: 256M
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  db:
    image: postgres:15-alpine
    container_name: flaskapp-db
    restart: unless-stopped
    environment:
      POSTGRES_DB: ${DB_NAME}
      POSTGRES_USER: ${DB_USER}
      POSTGRES_PASSWORD: ${DB_PASSWORD}
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./docker/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql
    ports:
      - "5432:5432"
    networks:
      - app-network
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
      interval: 10s
      timeout: 5s
      retries: 5
    command: >
      postgres
      -c max_connections=100
      -c shared_buffers=256MB
      -c effective_cache_size=1GB

  redis:
    image: redis:7-alpine
    container_name: flaskapp-redis
    restart: unless-stopped
    command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
    volumes:
      - redis_data:/data
    ports:
      - "6379:6379"
    networks:
      - app-network
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 3s
      retries: 3

  nginx:
    image: nginx:alpine
    container_name: flaskapp-nginx
    restart: unless-stopped
    depends_on:
      - web
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./docker/nginx/nginx.conf:/etc/nginx/nginx.conf
      - ./docker/nginx/ssl:/etc/nginx/ssl
      - static:/var/www/static:ro
      - uploads:/var/www/uploads:ro
      - ./logs/nginx:/var/log/nginx
    networks:
      - app-network
    healthcheck:
      test: ["CMD", "nginx", "-t"]
      interval: 30s
      timeout: 10s
      retries: 3

  certbot:
    image: certbot/certbot
    container_name: flaskapp-certbot
    volumes:
      - ./docker/nginx/ssl:/etc/letsencrypt
      - ./docker/nginx/webroot:/var/www/certbot
    depends_on:
      - nginx
    networks:
      - app-network
    command: certonly --webroot -w /var/www/certbot --force-renewal --email ${CERTBOT_EMAIL} -d ${DOMAIN_NAME} --agree-tos

volumes:
  postgres_data:
    driver: local
  redis_data:
    driver: local
  uploads:
    driver: local
  logs:
    driver: local
  static:
    driver: local

networks:
  app-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16

3. Docker生产环境Nginx配置

# docker/nginx/nginx.conf
events {
    worker_connections 1024;
}

http {
    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    # 日志格式
    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for"';

    access_log /var/log/nginx/access.log main;
    error_log /var/log/nginx/error.log warn;

    # 性能优化
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;
    types_hash_max_size 2048;
    client_max_body_size 50M;

    # Gzip压缩
    gzip on;
    gzip_vary on;
    gzip_min_length 1024;
    gzip_proxied any;
    gzip_comp_level 6;
    gzip_types text/plain text/css text/xml text/javascript
               application/json application/javascript application/xml+rss
               application/xml application/xhtml+xml image/svg+xml;

    # 上游服务
    upstream flaskapp {
        server web:8000;
        keepalive 32;
    }

    # HTTP服务器(重定向到HTTPS)
    server {
        listen 80;
        server_name ${DOMAIN_NAME} www.${DOMAIN_NAME};

        # Let's Encrypt验证
        location /.well-known/acme-challenge/ {
            root /var/www/certbot;
        }

        # 重定向到HTTPS
        location / {
            return 301 https://$server_name$request_uri;
        }
    }

    # HTTPS服务器
    server {
        listen 443 ssl http2;
        server_name ${DOMAIN_NAME} www.${DOMAIN_NAME};

        # SSL证书
        ssl_certificate /etc/nginx/ssl/live/${DOMAIN_NAME}/fullchain.pem;
        ssl_certificate_key /etc/nginx/ssl/live/${DOMAIN_NAME}/privkey.pem;

        # SSL优化
        ssl_protocols TLSv1.2 TLSv1.3;
        ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512;
        ssl_prefer_server_ciphers off;
        ssl_session_cache shared:SSL:10m;
        ssl_session_timeout 10m;
        ssl_session_tickets off;

        # 安全头
        add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
        add_header X-Frame-Options "SAMEORIGIN" always;
        add_header X-Content-Type-Options "nosniff" always;
        add_header X-XSS-Protection "1; mode=block" always;
        add_header Referrer-Policy "strict-origin-when-cross-origin" always;

        # 静态文件
        location /static/ {
            alias /var/www/static/;
            expires 1y;
            add_header Cache-Control "public, immutable";
        }

        location /uploads/ {
            alias /var/www/uploads/;
            expires 30d;
            add_header Cache-Control "public";

            # 安全限制
            internal;
        }

        # 反向代理到Flask应用
        location / {
            proxy_pass http://flaskapp;

            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
            proxy_set_header X-Forwarded-Host $server_name;

            proxy_connect_timeout 75s;
            proxy_send_timeout 3600s;
            proxy_read_timeout 3600s;

            proxy_buffer_size 128k;
            proxy_buffers 256 16k;
            proxy_busy_buffers_size 256k;

            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
        }

        # 健康检查
        location /health {
            proxy_pass http://flaskapp;
            access_log off;
        }
    }
}

云平台部署

1. Heroku部署

Heroku部署配置
简单 流行
# Procfile - Heroku进程文件
web: gunicorn app:create_app()

# runtime.txt - Python版本
python-3.11.5

# requirements.txt - 依赖文件(Heroku自动检测)

# app.json - Heroku应用描述
{
  "name": "Flask Application",
  "description": "A production-ready Flask application",
  "repository": "https://github.com/yourusername/flaskapp",
  "keywords": ["python", "flask", "gunicorn"],
  "env": {
    "FLASK_ENV": {
      "value": "production"
    },
    "SECRET_KEY": {
      "generator": "secret"
    }
  },
  "addons": [
    "heroku-postgresql:hobby-dev",
    "heroku-redis:hobby-dev"
  ],
  "buildpacks": [
    {
      "url": "heroku/python"
    }
  ]
}

# 部署命令
# 1. 安装Heroku CLI
curl https://cli-assets.heroku.com/install.sh | sh

# 2. 登录
heroku login

# 3. 创建应用
heroku create flaskapp-production
heroku addons:create heroku-postgresql:hobby-dev
heroku addons:create heroku-redis:hobby-dev

# 4. 设置环境变量
heroku config:set FLASK_ENV=production
heroku config:set SECRET_KEY=$(openssl rand -base64 32)
heroku config:set DATABASE_URL=$(heroku config:get DATABASE_URL)
heroku config:set REDIS_URL=$(heroku config:get REDIS_URL)

# 5. 部署代码
git push heroku main

# 6. 查看日志
heroku logs --tail

# 7. 打开应用
heroku open

2. AWS Elastic Beanstalk部署

# .ebextensions/01-packages.config - 安装系统包
packages:
  yum:
    postgresql-devel: []
    gcc: []
    python3-devel: []

# .ebextensions/02-python.config - Python配置
option_settings:
  aws:elasticbeanstalk:application:environment:
    FLASK_ENV: production
    PYTHONPATH: /var/app/current:$PYTHONPATH
  aws:elasticbeanstalk:container:python:
    WSGIPath: application:app
    NumProcesses: 3
    NumThreads: 20

# .ebextensions/03-nginx.config - Nginx配置
files:
  "/etc/nginx/conf.d/proxy.conf":
    mode: "000644"
    owner: root
    group: root
    content: |
      client_max_body_size 50M;

# requirements.txt - 依赖文件

# application.py - 应用入口
from app import create_app

application = create_app()

if __name__ == "__main__":
    application.run()

# 部署命令
# 1. 安装EB CLI
pip install awsebcli

# 2. 初始化
eb init -p python-3.11 flaskapp

# 3. 创建环境
eb create flaskapp-production \
  --single \
  --instance-types t3.small \
  --database \
  --database.username flaskuser \
  --database.password securepassword \
  --database.engine postgres \
  --vpc.elbpublic \
  --envvars FLASK_ENV=production,SECRET_KEY=your-secret-key

# 4. 部署
eb deploy

# 5. 查看状态
eb status
eb logs

3. Google Cloud Run部署

# Dockerfile for Cloud Run
FROM python:3.11-slim

WORKDIR /app

# 安装依赖
COPY requirements/production.txt .
RUN pip install --no-cache-dir -r production.txt gunicorn

# 复制应用代码
COPY . .

# 设置环境变量
ENV FLASK_ENV=production
ENV PORT=8080

# 运行应用
CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 app:create_app()

# cloudbuild.yaml - Cloud Build配置
steps:
  # 构建Docker镜像
  - name: 'gcr.io/cloud-builders/docker'
    args: ['build', '-t', 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA', '.']

  # 推送到Container Registry
  - name: 'gcr.io/cloud-builders/docker'
    args: ['push', 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA']

  # 部署到Cloud Run
  - name: 'gcr.io/cloud-builders/gcloud'
    args:
      - 'run'
      - 'deploy'
      - 'flaskapp'
      - '--image'
      - 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA'
      - '--platform'
      - 'managed'
      - '--region'
      - 'us-central1'
      - '--allow-unauthenticated'
      - '--memory'
      - '512Mi'
      - '--cpu'
      - '1'
      - '--max-instances'
      - '10'
      - '--set-env-vars'
      - 'FLASK_ENV=production,SECRET_KEY=${_SECRET_KEY}'

# 从Secret Manager获取密钥
availableSecrets:
  secretManager:
  - versionName: projects/$PROJECT_ID/secrets/SECRET_KEY/versions/latest
    env: '_SECRET_KEY'

images:
  - 'gcr.io/$PROJECT_ID/flaskapp:$COMMIT_SHA'

Kubernetes部署

1. Kubernetes部署配置

Kubernetes优势

自动扩展、自我修复、服务发现、负载均衡,适合大规模生产环境。

# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: flaskapp
  labels:
    name: flaskapp

# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: flaskapp-config
  namespace: flaskapp
data:
  # 应用配置
  FLASK_ENV: "production"
  LOG_LEVEL: "INFO"
  CORS_ORIGINS: "https://yourdomain.com"

  # 数据库配置
  DB_HOST: "postgres-service"
  DB_PORT: "5432"
  DB_NAME: "flaskapp_production"

  # Redis配置
  REDIS_HOST: "redis-service"
  REDIS_PORT: "6379"

  # Gunicorn配置
  WORKERS: "4"
  THREADS: "2"
  TIMEOUT: "30"

# k8s/secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: flaskapp-secrets
  namespace: flaskapp
type: Opaque
stringData:
  SECRET_KEY: "production-secret-key"
  DB_USER: "flaskuser"
  DB_PASSWORD: "production-db-password"
  MAIL_PASSWORD: "mail-app-password"
  STRIPE_SECRET_KEY: "sk_live_xxxxxxxxxxxxxx"

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: flaskapp-deployment
  namespace: flaskapp
  labels:
    app: flaskapp
    tier: web
spec:
  replicas: 3
  selector:
    matchLabels:
      app: flaskapp
      tier: web
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      labels:
        app: flaskapp
        tier: web
    spec:
      containers:
      - name: flaskapp
        image: yourregistry.com/flaskapp:latest
        imagePullPolicy: Always
        ports:
        - containerPort: 8000
          name: http
        env:
        - name: SECRET_KEY
          valueFrom:
            secretKeyRef:
              name: flaskapp-secrets
              key: SECRET_KEY
        - name: DATABASE_URL
          value: "postgresql://$(DB_USER):$(DB_PASSWORD)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)"
        envFrom:
        - configMapRef:
            name: flaskapp-config
        - secretRef:
            name: flaskapp-secrets
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /health
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 1
        volumeMounts:
        - name: uploads-volume
          mountPath: /app/uploads
        - name: logs-volume
          mountPath: /app/logs
        securityContext:
          runAsUser: 1000
          runAsGroup: 1000
          readOnlyRootFilesystem: true
          allowPrivilegeEscalation: false
      volumes:
      - name: uploads-volume
        persistentVolumeClaim:
          claimName: flaskapp-uploads-pvc
      - name: logs-volume
        emptyDir: {}
      securityContext:
        runAsNonRoot: true
        seccompProfile:
          type: RuntimeDefault

2. Kubernetes服务和入口

# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: flaskapp-service
  namespace: flaskapp
  labels:
    app: flaskapp
spec:
  selector:
    app: flaskapp
    tier: web
  ports:
  - port: 80
    targetPort: 8000
    protocol: TCP
    name: http
  type: ClusterIP

# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: flaskapp-ingress
  namespace: flaskapp
  annotations:
    kubernetes.io/ingress.class: "nginx"
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
    cert-manager.io/cluster-issuer: "letsencrypt-prod"
spec:
  tls:
  - hosts:
    - yourdomain.com
    secretName: flaskapp-tls
  rules:
  - host: yourdomain.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: flaskapp-service
            port:
              number: 80

# k8s/hpa.yaml - 水平自动扩展
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: flaskapp-hpa
  namespace: flaskapp
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: flaskapp-deployment
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

部署后工作

1. 监控和日志

监控至关重要!

没有监控的应用就像在黑暗中开车,你不知道何时会出问题。

# monitoring.py - 应用监控配置
import logging
from logging.handlers import RotatingFileHandler, SMTPHandler
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from prometheus_flask_exporter import PrometheusMetrics

def setup_monitoring(app):
    """配置应用监控"""

    # === Sentry错误监控 ===
    if app.config.get('SENTRY_DSN'):
        sentry_sdk.init(
            dsn=app.config['SENTRY_DSN'],
            integrations=[FlaskIntegration()],
            environment=app.config.get('ENVIRONMENT', 'production'),
            release=app.config.get('VERSION', '1.0.0'),
            traces_sample_rate=0.1,
            profiles_sample_rate=0.1,
        )
        app.logger.info('Sentry监控已启用')

    # === Prometheus指标 ===
    metrics = PrometheusMetrics(app)

    # 自定义指标
    requests_counter = metrics.counter(
        'flaskapp_requests_total',
        'Total number of requests',
        labels={'endpoint': lambda: request.endpoint}
    )

    @app.before_request
    def before_request():
        requests_counter.inc()

    # === 结构化日志 ===
    if not app.debug:
        # 文件日志
        file_handler = RotatingFileHandler(
            app.config.get('LOG_FILE', 'app.log'),
            maxBytes=10*1024*1024,  # 10MB
            backupCount=10
        )
        file_handler.setLevel(getattr(logging, app.config.get('LOG_LEVEL', 'INFO')))
        file_handler.setFormatter(logging.Formatter(
            '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
        ))
        app.logger.addHandler(file_handler)

        # 邮件错误通知
        if app.config.get('MAIL_SERVER') and app.config.get('ADMIN_EMAIL'):
            mail_handler = SMTPHandler(
                mailhost=(app.config['MAIL_SERVER'], app.config['MAIL_PORT']),
                fromaddr=app.config['MAIL_DEFAULT_SENDER'],
                toaddrs=[app.config['ADMIN_EMAIL']],
                subject='应用错误通知',
                credentials=(app.config['MAIL_USERNAME'], app.config['MAIL_PASSWORD']),
                secure=() if app.config.get('MAIL_USE_TLS') else None
            )
            mail_handler.setLevel(logging.ERROR)
            app.logger.addHandler(mail_handler)

    app.logger.setLevel(getattr(logging, app.config.get('LOG_LEVEL', 'INFO')))

# 健康检查端点
@app.route('/health')
def health_check():
    """健康检查端点"""
    checks = {
        'database': check_database(),
        'redis': check_redis(),
        'disk_space': check_disk_space(),
    }

    status = 200 if all(checks.values()) else 503

    return jsonify({
        'status': 'healthy' if status == 200 else 'unhealthy',
        'timestamp': datetime.now().isoformat(),
        'checks': checks
    }), status

# 指标端点
@app.route('/metrics')
@login_required
@admin_required
def metrics():
    """Prometheus指标端点"""
    return generate_latest()

def check_database():
    """检查数据库连接"""
    try:
        db.session.execute('SELECT 1')
        return True
    except Exception as e:
        app.logger.error(f'数据库检查失败: {e}')
        return False

def check_redis():
    """检查Redis连接"""
    try:
        redis_client.ping()
        return True
    except Exception as e:
        app.logger.error(f'Redis检查失败: {e}')
        return False

2. 备份策略

#!/bin/bash
# backup.sh - 数据库和文件备份脚本

# 配置
BACKUP_DIR="/var/backups/flaskapp"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30

# 创建备份目录
mkdir -p $BACKUP_DIR

# 数据库备份
DB_BACKUP_FILE="$BACKUP_DIR/db_backup_$DATE.sql"
echo "开始数据库备份..."
pg_dump -h localhost -U flask_user flask_production > $DB_BACKUP_FILE

# 压缩备份文件
gzip $DB_BACKUP_FILE

# 上传文件备份
UPLOADS_BACKUP_FILE="$BACKUP_DIR/uploads_backup_$DATE.tar.gz"
echo "开始上传文件备份..."
tar -czf $UPLOADS_BACKUP_FILE -C /var/www/flaskapp/uploads .

# 备份日志文件
LOGS_BACKUP_FILE="$BACKUP_DIR/logs_backup_$DATE.tar.gz"
tar -czf $LOGS_BACKUP_FILE -C /var/log/gunicorn .

# 上传到云存储(可选)
if [ -n "$AWS_ACCESS_KEY_ID" ]; then
    echo "上传备份到S3..."
    aws s3 cp $DB_BACKUP_FILE.gz s3://your-bucket/backups/db/
    aws s3 cp $UPLOADS_BACKUP_FILE s3://your-bucket/backups/uploads/
fi

# 清理旧备份
find $BACKUP_DIR -name "*.gz" -type f -mtime +$RETENTION_DAYS -delete

echo "备份完成: $DATE"

# 添加到crontab实现自动备份
# 每天凌晨2点执行备份
# 0 2 * * * /path/to/backup.sh >> /var/log/backup.log 2>&1

3. 安全加固

#!/bin/bash
# security_hardening.sh - 服务器安全加固

# 更新系统
apt update && apt upgrade -y

# 安装基础安全工具
apt install -y fail2ban ufw unattended-upgrades

# 配置防火墙
ufw default deny incoming
ufw default allow outgoing
ufw allow ssh
ufw allow http
ufw allow https
ufw enable

# 配置SSH安全
sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
sed -i 's/^PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
echo "AllowUsers flaskapp" >> /etc/ssh/sshd_config
systemctl restart sshd

# 配置fail2ban
cat > /etc/fail2ban/jail.local << EOF
[sshd]
enabled = true
port = ssh
filter = sshd
logpath = /var/log/auth.log
maxretry = 3
bantime = 3600

[nginx-http-auth]
enabled = true
filter = nginx-http-auth
port = http,https
logpath = /var/log/nginx/error.log
maxretry = 3
bantime = 3600
EOF

systemctl enable fail2ban
systemctl start fail2ban

# 自动安全更新
cat > /etc/apt/apt.conf.d/50unattended-upgrades << EOF
Unattended-Upgrade::Allowed-Origins {
    "\${distro_id}:\${distro_codename}-security";
    "\${distro_id}:\${distro_codename}-updates";
};
Unattended-Upgrade::AutoFixInterruptedDpkg "true";
Unattended-Upgrade::MinimalSteps "true";
Unattended-Upgrade::Remove-Unused-Dependencies "true";
Unattended-Upgrade::Automatic-Reboot "true";
Unattended-Upgrade::Automatic-Reboot-Time "02:00";
EOF

echo "安全加固完成!"

CI/CD管道

1. GitHub Actions配置

GitHub Actions完整配置
推荐 企业级
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}

jobs:
  # 代码检查
  lint:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.11'

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install flake8 black isort mypy

    - name: Lint with flake8
      run: |
        flake8 app tests --count --max-complexity=10 --statistics

    - name: Check formatting with black
      run: |
        black --check app tests

    - name: Check imports with isort
      run: |
        isort --check-only app tests

    - name: Type check with mypy
      run: |
        mypy app

  # 测试
  test:
    runs-on: ubuntu-latest
    services:
      postgres:
        image: postgres:15
        env:
          POSTGRES_PASSWORD: postgres
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 5432:5432

      redis:
        image: redis:7-alpine
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 6379:6379

    steps:
    - uses: actions/checkout@v3

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.11'

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements/development.txt

    - name: Run tests with pytest
      env:
        DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
        REDIS_URL: redis://localhost:6379/0
        SECRET_KEY: test-secret-key
      run: |
        pytest --cov=app --cov-report=xml --cov-report=html

    - name: Upload coverage to Codecov
      uses: codecov/codecov-action@v3
      with:
        file: ./coverage.xml
        flags: unittests

    - name: Archive test results
      uses: actions/upload-artifact@v3
      if: always()
      with:
        name: test-results
        path: |
          htmlcov/
          .coverage

  # 构建和推送Docker镜像
  build:
    needs: [lint, test]
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/main'

    steps:
    - uses: actions/checkout@v3

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v2

    - name: Log in to Container Registry
      uses: docker/login-action@v2
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}

    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v4
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=ref,event=branch
          type=sha
          type=raw,value=latest

    - name: Build and push
      uses: docker/build-push-action@v4
      with:
        context: .
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max

  # 部署到生产环境
  deploy:
    needs: build
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/main'

    steps:
    - uses: actions/checkout@v3

    - name: Deploy to Kubernetes
      uses: azure/k8s-deploy@v4
      with:
        namespace: flaskapp
        manifests: |
          k8s/namespace.yaml
          k8s/configmap.yaml
          k8s/secret.yaml
          k8s/deployment.yaml
          k8s/service.yaml
          k8s/ingress.yaml
        images: |
          ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
        kubectl-version: 'latest'

    - name: Verify deployment
      run: |
        kubectl rollout status deployment/flaskapp-deployment -n flaskapp --timeout=300s

    - name: Run smoke tests
      run: |
        # 执行冒烟测试
        curl -f https://yourdomain.com/health || exit 1

    - name: Notify on success
      if: success()
      uses: 8398a7/action-slack@v3
      with:
        channel: '#deployments'
        status: ${{ job.status }}
        author_name: GitHub Actions
      env:
        SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

    - name: Notify on failure
      if: failure()
      uses: 8398a7/action-slack@v3
      with:
        channel: '#alerts'
        status: ${{ job.status }}
        author_name: GitHub Actions
      env:
        SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

常见问题

根据项目需求选择部署方案:

项目类型 推荐方案 理由
个人项目/原型 Heroku / Railway 简单快速,无需运维
中小型网站 VPS + Docker 成本可控,灵活部署
企业级应用 Kubernetes 高可用,自动扩展
流量波动大 Serverless 按需付费,自动扩展
全球用户 多区域部署 + CDN 低延迟,高可用

决策树:

  1. 预算有限? → 选择VPS
  2. 想快速上线? → 选择PaaS
  3. 需要高可用? → 选择Kubernetes
  4. 流量不可预测? → 选择Serverless
  5. 有运维团队? → 选择自建Kubernetes

生产环境性能优化策略:

# performance_optimization.py
from flask import Flask
import gunicorn

app = Flask(__name__)

# === 1. 数据库优化 ===
# 使用连接池
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
    'pool_size': 20,
    'pool_recycle': 3600,
    'pool_pre_ping': True,
    'max_overflow': 10,
}

# 启用查询缓存
from sqlalchemy import event
from sqlalchemy.orm import Session

@event.listens_for(Session, 'do_orm_execute')
def receive_do_orm_execute(orm_execute_state):
    if orm_execute_state.is_select:
        orm_execute_state.statement = orm_execute_state.statement.execution_options(
            cache_key="my_cache_key"
        )

# === 2. 缓存优化 ===
from flask_caching import Cache

cache = Cache(app, config={
    'CACHE_TYPE': 'redis',
    'CACHE_REDIS_URL': app.config['REDIS_URL'],
    'CACHE_DEFAULT_TIMEOUT': 300,
    'CACHE_KEY_PREFIX': 'flask_cache:',
})

# 视图缓存
@app.route('/expensive-operation')
@cache.cached(timeout=300)
def expensive_operation():
    # 耗时操作
    return expensive_computation()

# === 3. 异步任务 ===
from celery import Celery

celery = Celery(
    app.name,
    broker=app.config['CELERY_BROKER_URL'],
    backend=app.config['CELERY_RESULT_BACKEND']
)

@celery.task
def send_email_async(to, subject, body):
    # 异步发送邮件
    send_email(to, subject, body)

# === 4. 静态文件优化 ===
# 使用CDN
@app.context_processor
def inject_cdn():
    return {'CDN_URL': app.config.get('CDN_URL', '')}

# 在模板中使用
# <img src="{{ CDN_URL }}/static/images/logo.png">

# === 5. Gunicorn配置优化 ===
# gunicorn_config.py
import multiprocessing

# 根据CPU核心数调整workers
workers = multiprocessing.cpu_count() * 2 + 1

# 使用异步worker提高并发
worker_class = 'gevent'
worker_connections = 1000

# 启用keep-alive
keepalive = 2

# === 6. 数据库查询优化 ===
@app.before_request
def before_request():
    # 监控慢查询
    import time
    g.start_time = time.time()

@app.after_request
def after_request(response):
    # 记录慢查询
    import time
    duration = time.time() - g.start_time
    if duration > 1.0:  # 超过1秒的查询
        app.logger.warning(f"慢请求: {request.path} - {duration:.2f}s")

    return response

# === 7. 启用压缩 ===
from flask_compress import Compress
Compress(app)

# === 8. 数据库索引优化 ===
# 确保常用查询字段有索引
# 定期分析查询性能
# 使用EXPLAIN ANALYZE分析查询计划

# === 9. 监控和调优 ===
@app.route('/performance')
@admin_required
def performance_metrics():
    """性能指标页面"""
    import psutil
    import resource

    metrics = {
        'memory': psutil.virtual_memory()._asdict(),
        'cpu_percent': psutil.cpu_percent(interval=1),
        'disk_usage': psutil.disk_usage('/')._asdict(),
        'open_files': resource.getrlimit(resource.RLIMIT_NOFILE),
        'process_memory': psutil.Process().memory_info()._asdict(),
    }

    return jsonify(metrics)

零停机部署策略:

# zero_downtime_deployment.py
from flask import Flask, request
import time
import signal
import sys
import threading

app = Flask(__name__)

class GracefulShutdown:
    """优雅关闭管理器"""

    def __init__(self):
        self.should_exit = False
        self.lock = threading.Lock()
        self.active_requests = 0

    def increment(self):
        with self.lock:
            self.active_requests += 1

    def decrement(self):
        with self.lock:
            self.active_requests -= 1

    def should_stop(self):
        with self.lock:
            return self.should_exit and self.active_requests == 0

shutdown_manager = GracefulShutdown()

# 请求计数中间件
@app.before_request
def before_request():
    if shutdown_manager.should_exit:
        return {'error': '服务器正在重启,请稍后重试'}, 503

    shutdown_manager.increment()

@app.after_request
def after_request(response):
    shutdown_manager.decrement()
    return response

# 优雅关闭信号处理
def handle_shutdown(signum, frame):
    print("收到关闭信号,开始优雅关闭...")
    shutdown_manager.should_exit = True

    # 等待活跃请求完成
    while not shutdown_manager.should_stop():
        print(f"等待 {shutdown_manager.active_requests} 个活跃请求完成...")
        time.sleep(1)

    print("所有请求处理完成,安全退出")
    sys.exit(0)

# 注册信号处理器
signal.signal(signal.SIGTERM, handle_shutdown)
signal.signal(signal.SIGINT, handle_shutdown)

# 健康检查端点(用于负载均衡器)
@app.route('/health')
def health_check():
    """健康检查端点"""
    if shutdown_manager.should_exit:
        return {'status': 'draining'}, 503

    checks = {
        'database': check_database(),
        'redis': check_redis(),
    }

    return {'status': 'healthy', 'checks': checks}, 200

# === Docker健康检查配置 ===
# Dockerfile:
# HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
#   CMD curl -f http://localhost:5000/health || exit 1

# === Kubernetes就绪探针 ===
# readinessProbe:
#   httpGet:
#     path: /health
#     port: 5000
#   initialDelaySeconds: 5
#   periodSeconds: 5
#   failureThreshold: 3

# === 蓝绿部署策略 ===
# 1. 部署新版本到新环境(绿色)
# 2. 运行测试验证新版本
# 3. 切换流量到新版本
# 4. 监控新版本运行情况
# 5. 如果发现问题,快速回滚到旧版本(蓝色)

# === 金丝雀发布 ===
# 1. 部署新版本到少数实例
# 2. 将少量用户流量导向新版本
# 3. 监控新版本性能和错误率
# 4. 如果一切正常,逐步增加流量
# 5. 最终完全切换到新版本

# === 数据库迁移策略 ===
def perform_database_migration():
    """零停机数据库迁移"""
    # 1. 向后兼容的数据库更改
    # 2. 部署支持新旧模式的应用版本
    # 3. 数据迁移(使用后台任务)
    # 4. 验证数据一致性
    # 5. 部署只使用新模式的版本
    # 6. 清理旧模式

    pass

# === 使用Nginx实现零停机 ===
# Nginx配置:
location / {
    proxy_pass http://backend;
    proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
    proxy_connect_timeout 2s;
    proxy_read_timeout 30s;
}

# upstream配置使用健康检查
upstream backend {
    server backend1:5000 max_fails=3 fail_timeout=30s;
    server backend2:5000 max_fails=3 fail_timeout=30s;
    server backend3:5000 max_fails=3 fail_timeout=30s backup;
}

生产环境监控体系:

# monitoring_system.py
import logging
from datetime import datetime
from dataclasses import dataclass
from typing import Dict, Any
from flask import Flask, request
import statsd
from prometheus_client import Counter, Histogram, generate_latest
import sentry_sdk

app = Flask(__name__)

# === 1. 应用指标 ===
# Prometheus指标
REQUEST_COUNT = Counter(
    'flaskapp_requests_total',
    'Application Request Count',
    ['method', 'endpoint', 'http_status']
)

REQUEST_LATENCY = Histogram(
    'flaskapp_request_latency_seconds',
    'Request latency',
    ['method', 'endpoint']
)

ERROR_COUNT = Counter(
    'flaskapp_errors_total',
    'Application Error Count',
    ['error_type']
)

DATABASE_QUERY_DURATION = Histogram(
    'flaskapp_db_query_duration_seconds',
    'Database query duration'
)

# === 2. 请求监控中间件 ===
@app.before_request
def before_request():
    request.start_time = datetime.now()
    request.request_id = str(uuid.uuid4())

@app.after_request
def after_request(response):
    # 记录请求指标
    REQUEST_COUNT.labels(
        method=request.method,
        endpoint=request.endpoint,
        http_status=response.status_code
    ).inc()

    # 记录延迟指标
    latency = (datetime.now() - request.start_time).total_seconds()
    REQUEST_LATENCY.labels(
        method=request.method,
        endpoint=request.endpoint
    ).observe(latency)

    # 添加请求ID到响应头
    response.headers['X-Request-ID'] = request.request_id

    # 记录慢请求
    if latency > 1.0:  # 超过1秒
        app.logger.warning(f"慢请求: {request.path} - {latency:.2f}s")

    return response

@app.errorhandler(Exception)
def handle_exception(error):
    # 记录错误指标
    ERROR_COUNT.labels(error_type=type(error).__name__).inc()

    # 发送到Sentry
    sentry_sdk.capture_exception(error)

    # 返回错误响应
    return {'error': '服务器内部错误', 'request_id': request.request_id}, 500

# === 3. 数据库监控 ===
from sqlalchemy import event
from sqlalchemy.engine import Engine

@event.listens_for(Engine, "before_cursor_execute")
def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
    conn.info.setdefault('query_start_time', []).append(datetime.now())

@event.listens_for(Engine, "after_cursor_execute")
def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
    total = (datetime.now() - conn.info['query_start_time'].pop(-1)).total_seconds()
    DATABASE_QUERY_DURATION.observe(total)

    # 记录慢查询
    if total > 0.5:  # 超过0.5秒
        app.logger.warning(f"慢查询: {statement[:200]} - {total:.2f}s")

# === 4. 业务指标 ===
USER_REGISTRATION_COUNT = Counter(
    'flaskapp_user_registrations_total',
    'User registration count'
)

ORDER_COUNT = Counter(
    'flaskapp_orders_total',
    'Order count',
    ['status']
)

API_CALL_COUNT = Counter(
    'flaskapp_api_calls_total',
    'API call count',
    ['api_name', 'status']
)

# === 5. 系统监控 ===
import psutil
from threading import Thread
import time

class SystemMonitor(Thread):
    """系统监控线程"""

    def __init__(self, app):
        super().__init__(daemon=True)
        self.app = app

    def run(self):
        while True:
            try:
                # 监控CPU使用率
                cpu_percent = psutil.cpu_percent(interval=1)

                # 监控内存使用
                memory = psutil.virtual_memory()

                # 监控磁盘使用
                disk = psutil.disk_usage('/')

                # 记录到日志
                self.app.logger.info(
                    f"系统指标: CPU={cpu_percent}%, "
                    f"内存={memory.percent}%, "
                    f"磁盘={disk.percent}%"
                )

                # 发送到监控系统
                self.send_to_monitoring({
                    'cpu_percent': cpu_percent,
                    'memory_percent': memory.percent,
                    'disk_percent': disk.percent,
                })

                time.sleep(60)  # 每分钟检查一次

            except Exception as e:
                self.app.logger.error(f"系统监控错误: {e}")
                time.sleep(60)

    def send_to_monitoring(self, metrics):
        """发送指标到监控系统"""
        # 这里可以集成StatsD、Datadog、New Relic等
        pass

# 启动系统监控
monitor = SystemMonitor(app)
monitor.start()

# === 6. 日志聚合 ===
import structlog

def setup_structured_logging(app):
    """配置结构化日志"""
    structlog.configure(
        processors=[
            structlog.stdlib.filter_by_level,
            structlog.stdlib.add_logger_name,
            structlog.stdlib.add_log_level,
            structlog.stdlib.PositionalArgumentsFormatter(),
            structlog.processors.TimeStamper(fmt="iso"),
            structlog.processors.StackInfoRenderer(),
            structlog.processors.format_exc_info,
            structlog.processors.UnicodeDecoder(),
            structlog.processors.JSONRenderer()
        ],
        context_class=dict,
        logger_factory=structlog.stdlib.LoggerFactory(),
        wrapper_class=structlog.stdlib.BoundLogger,
        cache_logger_on_first_use=True,
    )

    app.logger = structlog.get_logger()

# === 7. 报警规则 ===
@dataclass
class AlertRule:
    """报警规则"""
    name: str
    condition: callable
    severity: str  # critical, warning, info
    notification_channels: list

class AlertManager:
    """报警管理器"""

    def __init__(self):
        self.rules = []

    def add_rule(self, rule: AlertRule):
        self.rules.append(rule)

    def check_alerts(self, metrics: Dict[str, Any]):
        """检查报警规则"""
        alerts = []

        for rule in self.rules:
            if rule.condition(metrics):
                alerts.append({
                    'rule': rule.name,
                    'severity': rule.severity,
                    'timestamp': datetime.now().isoformat(),
                    'metrics': metrics
                })

                # 发送通知
                self.send_notification(rule, metrics)

        return alerts

    def send_notification(self, rule: AlertRule, metrics: Dict[str, Any]):
        """发送报警通知"""
        # 发送到Slack
        # 发送到邮件
        # 发送到短信
        # 发送到电话
        pass

# 配置报警规则
alert_manager = AlertManager()
alert_manager.add_rule(AlertRule(
    name="高CPU使用率",
    condition=lambda m: m.get('cpu_percent', 0) > 80,
    severity="warning",
    notification_channels=["slack", "email"]
))

alert_manager.add_rule(AlertRule(
    name="高内存使用率",
    condition=lambda m: m.get('memory_percent', 0) > 90,
    severity="critical",
    notification_channels=["slack", "email", "sms"]
))

# === 8. 监控端点 ===
@app.route('/metrics')
def metrics():
    """Prometheus指标端点"""
    return generate_latest()

@app.route('/health')
def health():
    """健康检查端点"""
    return {'status': 'healthy'}, 200

@app.route('/status')
@admin_required
def status():
    """应用状态页面"""
    import psutil
    import resource

    status = {
        'application': {
            'uptime': time.time() - psutil.Process().create_time(),
            'version': app.config.get('VERSION', '1.0.0'),
            'environment': app.config.get('FLASK_ENV', 'production'),
        },
        'system': {
            'cpu_percent': psutil.cpu_percent(interval=1),
            'memory_percent': psutil.virtual_memory().percent,
            'disk_percent': psutil.disk_usage('/').percent,
        },
        'requests': {
            'active': shutdown_manager.active_requests,
        }
    }

    return jsonify(status)

部署检查清单

  • ✓ 禁用调试模式和详细错误信息
  • ✓ 配置生产环境数据库和缓存
  • ✓ 设置SSL证书和HTTPS重定向
  • ✓ 配置防火墙和安全组规则
  • ✓ 设置监控和报警系统
  • ✓ 配置日志聚合和轮转
  • ✓ 实现自动化备份策略
  • ✓ 设置CI/CD管道
  • ✓ 配置负载均衡和自动扩展
  • ✓ 测试灾难恢复流程

部署工具推荐

云平台
  • AWS:EC2, ECS, EKS, Beanstalk
  • Google Cloud:Compute Engine, GKE, Cloud Run
  • Azure:App Service, AKS, Container Instances
  • DigitalOcean:Droplets, Kubernetes, App Platform
监控工具
  • Sentry:错误监控和追踪
  • Prometheus + Grafana:指标监控
  • Datadog:全栈监控
  • New Relic:应用性能监控