Serverless Containers: Fargate, Cloud Run, Knative

Introduction

Serverless containers combine container portability with serverless simplicity. This article compares AWS Fargate, Google Cloud Run, and Knative for serverless container deployments.

Key Statistics:

Serverless container market: $2B+
Cloud Run: 1M+ containers/second
Fargate: 100K+ tasks running
Cold start: 50-200ms

Platform Comparison

┌─────────────────────────────────────────────────────────────────┐
│              Serverless Container Platforms                                   │
├─────────────────────────────────────────────────────────────────┤
│                                                                  │
│  AWS Fargate                                                     │
│  ├── Integration: ECS, EKS                                       │
│  ├── Pricing: vCPU-hour + GB-hour                               │
│  ├── Min scale: 0 (with minimum)                               │
│  ├── Max scale: Unlimited (configurable)                        │
│  ├── Cold start: ~30-60 seconds                                 │
│  └── Networking: VPC integration                                │
│                                                                  │
│  Google Cloud Run                                               │
│  ├── Integration: Cloud Build, GKE                              │
│  ├── Pricing: Request-based + instance-hour                    │
│  ├── Min scale: 0                                              │
│  ├── Max scale: 1000 (configurable)                           │
│  ├── Cold start: ~50-200ms                                     │
│  └── Networking: Global load balancing                          │
│                                                                  │
│  Knative                                                        │
│  ├── Integration: Kubernetes                                     │
│  ├── Pricing: Kubernetes pricing + networking                   │
│  ├── Min scale: 0 (can be 1 for latency)                      │
│  ├── Max scale: Unlimited                                      │
│  ├── Cold start: Depends on cluster                            │
│  └── Networking: Istio/Contour ingress                        │
│                                                                  │
│  Comparison                                                     │
│  ├── Managed: Cloud Run > Fargate > Knative                   │
│  ├── Portable: Knative > Cloud Run > Fargate                  │
│  └── Cost: Fargate > Cloud Run > Knative (self-managed)      │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘

AWS Fargate Implementation

#!/usr/bin/env python3
"""AWS Fargate task management."""

import boto3
from typing import Dict, List
from dataclasses import dataclass

@dataclass
class FargateTask:
    """Fargate task definition."""
    
    task_definition_arn: str
    cluster_arn: str
    task_id: str
    status: str
    started_at: str

class FargateManager:
    """Manage AWS Fargate tasks."""
    
    def __init__(self, region: str = "us-east-1"):
        self.ecs = boto3.client('ecs', region_name=region)
        self.ec2 = boto3.client('ec2', region_name=region)
    
    def register_task_definition(self, task_name: str,
                                image_uri: str,
                                cpu: int = 256,
                                memory: int = 512,
                                environment: Dict = None) -> str:
        """Register Fargate task definition."""
        
        container_def = {
            'name': task_name,
            'image': image_uri,
            'cpu': cpu,
            'memory': memory,
            'essential': True,
            'portMappings': [
                {
                    'containerPort': 8080,
                    'protocol': 'tcp'
                }
            ],
            'logConfiguration': {
                'logDriver': 'awslogs',
                'options': {
                    'awslogs-group': f'/ecs/{task_name}',
                    'awslogs-region': 'us-east-1',
                    'awslogs-stream-prefix': 'ecs'
                }
            }
        }
        
        if environment:
            container_def['environment'] = [
                {'name': k, 'value': v}
                for k, v in environment.items()
            ]
        
        response = self.ecs.register_task_definition(
            family=task_name,
            networkMode='awsvpc',
            requiresCompatibilities=['FARGATE'],
            cpu=str(cpu),
            memory=str(memory),
            containerDefinitions=[container_def]
        )
        
        return response['taskDefinition']['taskDefinitionArn']
    
    def run_task(self, cluster: str, task_name: str,
                 subnets: List[str], security_groups: List[str],
                 desired_count: int = 1) -> List[FargateTask]:
        """Run Fargate task."""
        
        response = self.ecs.run_task(
            cluster=cluster,
            taskDefinition=task_name,
            launchType='FARGATE',
            networkConfiguration={
                'awsvpcConfiguration': {
                    'subnets': subnets,
                    'securityGroups': security_groups,
                    'assignPublicIp': 'DISABLED'
                }
            },
            desiredCount=desired_count
        )
        
        tasks = []
        for task in response['tasks']:
            tasks.append(FargateTask(
                task_definition_arn=task['taskDefinitionArn'],
                cluster_arn=task['clusterArn'],
                task_id=task['taskId'],
                status=task['lastStatus'],
                started_at=task['startedAt']
            ))
        
        return tasks
    
    def create_service(self, cluster: str, service_name: str,
                      task_def: str, subnets: List[str],
                      min_capacity: int = 0,
                      max_capacity: int = 10) -> str:
        """Create Fargate service with auto-scaling."""
        
        # Create service
        response = self.ecs.create_service(
            cluster=cluster,
            serviceName=service_name,
            taskDefinition=task_def,
            launchType='FARGATE',
            desiredCount=min_capacity,
            networkConfiguration={
                'awsvpcConfiguration': {
                    'subnets': subnets,
                    'assignPublicIp': 'DISABLED'
                }
            },
            enableExecuteCommand=True
        )
        
        # Setup auto-scaling
        self._setup_auto_scaling(
            cluster, service_name, min_capacity, max_capacity
        )
        
        return response['service']['serviceArn']
    
    def _setup_auto_scaling(self, cluster: str, service: str,
                           min_capacity: int, max_capacity: int):
        """Setup Application Auto Scaling."""
        
        appautoscaling = boto3.client('application-autoscaling')
        
        # Register scalable target
        appautoscaling.register_scalable_target(
            ServiceNamespace='ecs',
            ResourceId=f"service/{cluster}/{service}",
            ScalableDimension='ecs:service:DesiredCount',
            MinCapacity=min_capacity,
            MaxCapacity=max_capacity
        )
        
        # Put scaling policy
        appautoscaling.put_scaling_policy(
            PolicyName=f'{service}-cpu-scaling',
            ServiceNamespace='ecs',
            ResourceId=f"service/{cluster}/{service}",
            ScalableDimension='ecs:service:DesiredCount',
            PolicyType='TargetTrackingScaling',
            TargetTrackingScalingPolicyConfiguration={
                'PredefinedMetricSpecification': {
                    'PredefinedMetricType': 'ECSServiceAverageCPUUtilization'
                },
                'TargetValue': 70.0,
                'ScaleInCooldown': 60,
                'ScaleOutCooldown': 60
            }
        )

Google Cloud Run Implementation

# Cloud Run service configuration
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: my-service
  namespace: default
spec:
  template:
    metadata:
      annotations:
        # Auto-scaling
        autoscaling.knative.dev/minScale: "0"
        autoscaling.knative.dev/maxScale: "100"
        
        # Concurrency
        autoscaling.knative.dev/targetConcurrency: "80"
        
        # CPU allocation
        cloud.google.com/gce-cpu-utilization: "0.5"
        
    spec:
      containerConcurrency: 80
      timeoutSeconds: 300
      serviceAccountName: service-account
      
      containers:
        - image: gcr.io/project/image:latest
          ports:
            - containerPort: 8080
          
          resources:
            limits:
              cpu: 1000m
              memory: 512Mi
            requests:
              cpu: 250m
              memory: 256Mi
          
          env:
            - name: PORT
              value: "8080"
            - name: MAX_CONNECTIONS
              value: "100"
          
          volumeMounts:
            - name: secret
              mountPath: /secrets
              readOnly: true
          
          livenessProbe:
            httpGet:
              path: /health
            initialDelaySeconds: 10
            periodSeconds: 5
          
          readinessProbe:
            httpGet:
              path: /ready
            initialDelaySeconds: 5
            periodSeconds: 3
          
      volumes:
        - name: secret
          secret:
            secretName: api-keys

---
# Cloud Run with GPU
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ml-service
spec:
  template:
    metadata:
      annotations:
        cloud.google.com/gpu: "true"
    spec:
      containers:
        - image: gcr.io/ml-project/gpu-service:latest
          resources:
            limits:
              nvidia.com/gpu: "1"

#!/usr/bin/env python3
"""Google Cloud Run management."""

from google.cloud import run_v2
from google.cloud.run_v2 import services
from typing import Dict

class CloudRunManager:
    """Manage Cloud Run services."""
    
    def __init__(self, project_id: str, region: str = "us-central1"):
        self.project_id = project_id
        self.region = region
        self.client = run_v2.ServicesClient()
    
    def create_service(self, service_name: str, 
                       image: str, 
                       config: Dict = None) -> str:
        """Create Cloud Run service."""
        
        parent = f"projects/{self.project_id}/locations/{self.region}"
        
        service = run_v2.Service(
            template=run_v2.RevisionTemplate(
                containers=[
                    run_v2.Container(
                        image=image,
                        ports=[run_v2.ContainerPort(
                            container_port=config.get('port', 8080)
                        )],
                        resources=run_v2.ResourceRequirements(
                            limits={
                                'cpu': config.get('cpu', '1000m'),
                                'memory': config.get('memory', '512Mi')
                            }
                        )
                    )
                ],
                scaling=run_v2.RevisionScaling(
                    min_instance_count=config.get('min_instances', 0),
                    max_instance_count=config.get('max_instances', 100)
                )
            )
        )
        
        operation = self.client.create_service(
            parent=parent,
            service=service,
            service_id=service_name
        )
        
        result = operation.result()
        return result.name
    
    def update_service(self, service_name: str, image: str):
        """Update Cloud Run service."""
        
        service_name = f"projects/{self.project_id}/locations/{self.region}/services/{service_name}"
        
        service = run_v2.Service(
            template=run_v2.RevisionTemplate(
                containers=[
                    run_v2.Container(image=image)
                ]
            )
        )
        
        operation = self.client.replace_service(
            name=service_name,
            service=service
        )
        
        return operation.result()

Knative Implementation

# Knative Service
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: knative-service
spec:
  template:
    metadata:
      name: knative-service-v1
    spec:
      containers:
        - image: gcr.io/knative-samples/helloworld-go
          env:
            - name: TARGET
              value: "Knative"
      # Scale to zero configuration
      scaleConfig:
        minScale: 0
        maxScale: 100

---
# Knative with custom autoscaler
apiVersion: autoscaling.k9s.io/v1alpha1
kind: KPAA
metadata:
  name: kpa-service
spec:
  scaleTargetRef:
    apiVersion: serving.knative.dev/v1
    kind: Service
    name: my-service
  replicas:
    min: 0
    max: 10
  metrics:
    - type: Concurrency
      concurrencyTarget: 10
      concurrencyLimit: 100
    - type: RPS
      rpsTarget: 50
  behavior:
    scaleUp:
      stabilizationWindow: 30s
      policies:
        - type: Percent
          value: 100
          periodSeconds: 15
    scaleDown:
      stabilizationWindow: 300s
      policies:
        - type: Percent
          value: 50
          periodSeconds: 15

---
# Knative with GPU
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ml-service
spec:
  template:
    metadata:
      annotations:
        # GPU configuration
        knative.dev/class: kourier
    spec:
      containers:
        - image: ml-service:latest
          resources:
            limits:
              nvidia.com/gpu: "1"

#!/usr/bin/env python3
"""Knative deployment with Python."""

from kubernetes import client, config
from kubernetes.client.rest import ApiException

class KnativeManager:
    """Manage Knative services."""
    
    def __init__(self):
        try:
            config.load_incluster_config()
        except:
            config.load_kube_config()
        
        self.api = client.CustomObjectsApi()
    
    def create_service(self, name: str, image: str,
                       min_scale: int = 0, 
                       max_scale: int = 10) -> dict:
        """Create Knative service."""
        
        service_manifest = {
            "apiVersion": "serving.knative.dev/v1",
            "kind": "Service",
            "metadata": {
                "name": name
            },
            "spec": {
                "template": {
                    "metadata": {
                        "annotations": {
                            "autoscaling.knative.dev/minScale": str(min_scale),
                            "autoscaling.knative.dev/maxScale": str(max_scale)
                        }
                    },
                    "spec": {
                        "containers": [
                            {
                                "image": image,
                                "ports": [
                                    {"containerPort": 8080}
                                ]
                            }
                        ]
                    }
                }
            }
        }
        
        return self.api.create_namespaced_custom_object(
            group="serving.knative.dev",
            version="v1",
            namespace="default",
            plural="services",
            body=service_manifest
        )

Cost Comparison

#!/usr/bin/env python3
"""Calculate serverless container costs."""

from dataclasses import dataclass
from typing import Dict

@datlass
class CostEstimate:
    """Cost estimate for serverless container."""
    
    provider: str
    compute_cost_monthly: float
    request_cost_monthly: float
    total_monthly: float

class CostCalculator:
    """Calculate serverless container costs."""
    
    # Pricing (example, USD)
    PRICES = {
        'fargate': {
            'vCPU_hour': 0.04048,
            'GB_hour': 0.004445,
        },
        'cloud_run': {
            'instance_hour': 0.00024,  # 0.24 per 1000 vCPU-seconds
            'vCPU_second': 0.00024,
            'GB_second': 0.000024,
            'request': 0.0000004,  # $0.40 per million
        },
        'knative': {
            # Self-managed, depends on cluster
        }
    }
    
    def calculate_fargate(self, vcpu: float, memory_gb: float,
                         hours_per_month: float = 730) -> CostEstimate:
        """Calculate Fargate costs."""
        
        compute = self.PRICES['fargate']
        
        vcpu_cost = vcpu * compute['vCPU_hour'] * hours_per_month
        memory_cost = memory_gb * compute['GB_hour'] * hours_per_month
        
        return CostEstimate(
            provider='AWS Fargate',
            compute_cost_monthly=vcpu_cost + memory_cost,
            request_cost_monthly=0,
            total_monthly=vcpu_cost + memory_cost
        )
    
    def calculate_cloud_run(self, vcpu: float, memory_gb: float,
                          requests_per_month: int,
                          avg_duration_ms: float = 100,
                          instance_hours: float = 0) -> CostEstimate:
        """Calculate Cloud Run costs."""
        
        prices = self.PRICES['cloud_run']
        
        # Instance hours (if always on)
        instance_cost = instance_hours * prices['instance_hour'] * 3600
        
        # CPU and memory (billed in vCPU-seconds)
        vcpu_cost = vcpu * (avg_duration_ms / 1000) * requests_per_month * prices['vCPU_second']
        memory_cost = memory_gb * (avg_duration_ms / 1000) * requests_per_month * prices['GB_second']
        
        # Request cost
        request_cost = requests_per_month * prices['request']
        
        total = instance_cost + vcpu_cost + memory_cost + request_cost
        
        return CostEstimate(
            provider='Google Cloud Run',
            compute_cost_monthly=vcpu_cost + memory_cost + instance_cost,
            request_cost_monthly=request_cost,
            total_monthly=total
        )
    
    def compare_providers(self, config: Dict) -> Dict:
        """Compare costs across providers."""
        
        results = {}
        
        # Fargate
        results['fargate'] = self.calculate_fargate(
            config['vcpu'], config['memory_gb'], config['hours']
        )
        
        # Cloud Run
        results['cloud_run'] = self.calculate_cloud_run(
            config['vcpu'], config['memory_gb'],
            config['requests'], config['duration_ms'],
            0 if config.get('scale_to_zero', True) else config['hours']
        )
        
        return results