Docker Development to Production Operations - Practical Guide for DevOps Teams¶
Target Audience
- DevOps Engineers: Formulating and implementing containerization strategies
- Infrastructure Engineers: Building and operating production environments
- Development Team Leads: Improving development efficiency and optimizing workflows
- SRE Engineers: Enhancing reliability and performance
Key Points¶
Zero-Downtime Deployment
Production updates without service interruption using Blue-Green/Rolling Deployment
Scalable Architecture
Auto-scaling environment with Docker Swarm/Kubernetes
Enhanced Security
Multi-stage builds, non-root user execution, secrets management
Comprehensive Monitoring
Log aggregation, metrics collection, integrated health checks
🏗️ Architecture Design¶
1. Multi-Stage Development Environment¶
Production-ready Dockerfile implementation example:
# === Base Image ===
FROM node:18-alpine AS base
WORKDIR /app
RUN apk add --no-cache \
dumb-init \
python3 \
make \
g++ \
&& rm -rf /var/cache/apk/*
# === Dependencies Stage ===
FROM base AS dependencies
COPY package*.json ./
RUN npm ci --only=production --no-audit --no-fund \
&& npm cache clean --force
# === Development Stage ===
FROM base AS development
COPY package*.json ./
RUN npm ci --no-audit --no-fund
COPY . .
EXPOSE 3000
CMD ["npm", "run", "dev"]
# === Build Stage ===
FROM base AS build
COPY package*.json ./
RUN npm ci --no-audit --no-fund
COPY . .
RUN npm run build
# === Production Stage ===
FROM base AS production
RUN addgroup -g 1001 -S nodejs \
&& adduser -S nextjs -u 1001
COPY --from=dependencies /app/node_modules ./node_modules
COPY --from=build /app/.next ./.next
COPY --from=build /app/public ./public
COPY --from=build /app/package.json ./package.json
USER nextjs
EXPOSE 3000
ENTRYPOINT ["dumb-init", "--"]
CMD ["npm", "start"]
2. Environment-Specific Configuration Management¶
docker-compose.yml (Development Environment)¶
version: '3.8'
services:
app:
build:
context: .
target: development
volumes:
- .:/app
- /app/node_modules
ports:
- "3000:3000"
environment:
- NODE_ENV=development
- DATABASE_URL=postgresql://dev_user:dev_pass@db:5432/dev_db
depends_on:
db:
condition: service_healthy
networks:
- app-network
db:
image: postgres:15-alpine
environment:
POSTGRES_USER: dev_user
POSTGRES_PASSWORD: dev_pass
POSTGRES_DB: dev_db
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U dev_user -d dev_db"]
interval: 30s
timeout: 10s
retries: 3
networks:
- app-network
redis:
image: redis:7-alpine
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
networks:
- app-network
volumes:
postgres_data:
networks:
app-network:
driver: bridge
docker-compose.prod.yml (Production Environment)¶
version: '3.8'
services:
app:
build:
context: .
target: production
restart: unless-stopped
environment:
- NODE_ENV=production
- DATABASE_URL=${{ secrets.DATABASE_URL }}
- REDIS_URL=${{ secrets.REDIS_URL }}
deploy:
replicas: 3
update_config:
parallelism: 1
delay: 10s
order: start-first
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- app-network
labels:
- "traefik.enable=true"
- "traefik.http.routers.app.rule=Host(`example.com`)"
- "traefik.http.routers.app.tls=true"
- "traefik.http.routers.app.tls.certresolver=letsencrypt"
traefik:
image: traefik:v2.10
command:
- "--api.dashboard=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--entrypoints.web.address=:80"
- "--entrypoints.websecure.address=:443"
- "--certificatesresolvers.letsencrypt.acme.tlschallenge=true"
- "--certificatesresolvers.letsencrypt.acme.email=admin@example.com"
- "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json"
ports:
- "80:80"
- "443:443"
volumes:
- "/var/run/docker.sock:/var/run/docker.sock:ro"
- "letsencrypt_data:/letsencrypt"
networks:
- app-network
volumes:
letsencrypt_data:
networks:
app-network:
external: true
🚀 CI/CD Pipeline Implementation¶
GitHub Actions Workflow¶
name: Docker Production Deployment
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build test image
uses: docker/build-push-action@v5
with:
context: .
target: development
load: true
tags: test-image
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Run tests
run: |
docker run --rm \
-v ${{ github.workspace }}:/app \
test-image \
npm run test:ci
- name: Security scan
uses: docker/scout-action@v1
with:
command: cves
image: test-image
build-and-deploy:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
- name: Login to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
target: production
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Deploy to production
uses: appleboy/ssh-action@v1.0.0
with:
host: ${{ secrets.PROD_HOST }}
username: ${{ secrets.PROD_USER }}
key: ${{ secrets.PROD_SSH_KEY }}
script: |
cd /opt/app
docker compose -f docker-compose.prod.yml pull
docker compose -f docker-compose.prod.yml up -d --remove-orphans
docker system prune -f
📊 Monitoring and Log Integration¶
1. Metrics Monitoring with Prometheus + Grafana¶
# monitoring/docker-compose.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${{ secrets.GRAFANA_PASSWORD }}
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./grafana/datasources:/etc/grafana/provisioning/datasources
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points'
- '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
volumes:
prometheus_data:
grafana_data:
2. Log Aggregation with ELK Stack¶
# logging/docker-compose.yml
version: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1g -Xmx1g"
- xpack.security.enabled=false
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
logstash:
image: docker.elastic.co/logstash/logstash:8.8.0
ports:
- "5000:5000"
- "9600:9600"
volumes:
- ./logstash/config:/usr/share/logstash/pipeline
depends_on:
- elasticsearch
kibana:
image: docker.elastic.co/kibana/kibana:8.8.0
ports:
- "5601:5601"
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
depends_on:
- elasticsearch
volumes:
elasticsearch_data:
🔒 Security Best Practices¶
1. Secure Dockerfile Implementation¶
# Security-hardened Dockerfile
FROM node:18-alpine AS base
# Apply security updates
RUN apk update && apk upgrade && \
apk add --no-cache dumb-init && \
rm -rf /var/cache/apk/*
# Create non-privileged user
RUN addgroup -g 1001 -S appgroup && \
adduser -S appuser -u 1001 -G appgroup
# Secure working directory setup
WORKDIR /app
RUN chown -R appuser:appgroup /app
# Install dependencies (as root)
COPY package*.json ./
RUN npm ci --only=production --no-audit --no-fund && \
npm cache clean --force
# Copy application files
COPY --chown=appuser:appgroup . .
# Switch to non-root user
USER appuser
# Implement health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD curl -f http://localhost:3000/health || exit 1
# Secure port configuration
EXPOSE 3000
# Proper entrypoint
ENTRYPOINT ["dumb-init", "--"]
CMD ["npm", "start"]
2. Docker Secrets Management¶
# docker-compose example for secrets management
version: '3.8'
services:
app:
image: myapp:latest
secrets:
- db_password
- api_key
environment:
- DB_PASSWORD_FILE=/run/secrets/db_password
- API_KEY_FILE=/run/secrets/api_key
secrets:
db_password:
external: true
api_key:
external: true
3. Network Security¶
# Network isolation implementation
version: '3.8'
services:
web:
image: nginx:alpine
networks:
- frontend
ports:
- "80:80"
app:
image: myapp:latest
networks:
- frontend
- backend
depends_on:
- db
db:
image: postgres:15-alpine
networks:
- backend
environment:
POSTGRES_PASSWORD_FILE: /run/secrets/db_password
secrets:
- db_password
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true # Prohibit external access
secrets:
db_password:
external: true
🎯 Performance Optimization¶
1. Image Size Optimization with Multi-Stage Builds¶
# Optimized multi-stage build
FROM node:18-alpine AS base
RUN apk add --no-cache libc6-compat
WORKDIR /app
# Isolate dependencies
FROM base AS deps
COPY package.json package-lock.json ./
RUN npm ci --only=production && npm cache clean --force
# Build-only stage
FROM base AS builder
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
RUN npm run build
# Production runtime (minimal)
FROM base AS runner
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
COPY --from=deps /app/node_modules ./node_modules
COPY --from=builder /app/.next ./.next
COPY --from=builder /app/public ./public
COPY --from=builder /app/package.json ./package.json
USER nextjs
CMD ["npm", "start"]
2. Cache Strategy Implementation¶
#!/bin/bash
# Build cache optimization script
# Using cache with Docker Buildx
docker buildx build \
--cache-from type=registry,ref=myregistry/myapp:cache \
--cache-to type=registry,ref=myregistry/myapp:cache,mode=max \
--push \
--tag myregistry/myapp:latest \
.
# Analyze layer cache
docker history myregistry/myapp:latest --format "table {{.CreatedBy}}\t{{.Size}}"
🚨 Troubleshooting¶
1. Common Issues and Solutions¶
Container Startup Failures¶
# Launch container for debugging
docker run -it --rm myapp:latest /bin/sh
# Check detailed logs
docker logs --details container_name
# Check health status
docker inspect --format='{{.State.Health}}' container_name
Out of Memory Errors¶
# Resource limit configuration
services:
app:
image: myapp:latest
deploy:
resources:
limits:
memory: 512M
cpus: '0.5'
reservations:
memory: 256M
cpus: '0.25'
Disk Space Issues¶
# Remove unused images and containers
docker system prune -a -f
# Check volume capacity
docker system df
# Script to automatically remove old images
docker images --format "table {{.Repository}}:{{.Tag}}\t{{.CreatedAt}}\t{{.Size}}" | \
grep "weeks ago\|months ago" | \
awk '{print $1}' | \
xargs docker rmi
📈 Operations Monitoring Dashboard¶
Grafana Dashboard Configuration¶
{
"dashboard": {
"title": "Docker Production Monitoring",
"panels": [
{
"title": "Container CPU Usage",
"type": "graph",
"targets": [
{
"expr": "rate(container_cpu_usage_seconds_total[5m]) * 100",
"legendFormat": "{{container_name}}"
}
]
},
{
"title": "Container Memory Usage",
"type": "graph",
"targets": [
{
"expr": "container_memory_usage_bytes / 1024 / 1024",
"legendFormat": "{{container_name}} (MB)"
}
]
},
{
"title": "Application Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
}
]
}
]
}
}
🔗 Related Articles¶
- 🐳 Claude Code Docker Complete Guide - Basic Docker integration
- ⚡ Claude Code Auto-Execution Guide - Automating permission management
- 🚀 Amazon Kiro Complete Guide - Next-generation IDE integration
📚 References¶
- Docker Official Documentation
- Docker Compose Production Guide
- Kubernetes Docker Integration
- OWASP Container Security
We hope this article helps your development team with Docker production operations. Through continuous improvement and monitoring, let's build a safe and efficient container environment.