Introduction
Multi-cloud strategies are no longer optionalโthey’re a business necessity. 89% of enterprises use multi-cloud infrastructure, but managing multiple platforms creates complexity. This guide covers orchestrating infrastructure across AWS, Azure, and GCP using industry-standard tools.
Key Statistics:
- 89% of enterprises use multi-cloud
- Average savings from multi-cloud: 15-30%
- Vendor lock-in costs enterprises $150B annually
- Multi-cloud reduces downtime by 70%
Tool Comparison
| Tool | Language | State Management | Learning Curve | Enterprise Ready |
|---|---|---|---|---|
| Terraform | HCL | Remote | Medium | Excellent |
| Pulumi | Python/TS/Go | Remote | Low | Excellent |
| CloudFormation | YAML/JSON | AWS-only | Medium | Excellent |
| Crossplane | YAML | Composite | Medium | Growing |
Terraform Multi-Cloud Setup
Provider Configuration
# providers.tf
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
azurerm = {
source = "hashicorp/azurerm "~> 3.0"
"
version = }
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "terraform-state-bucket"
key = "infrastructure/terraform.tfstate"
region = "us-east-1"
}
}
provider "aws" {
region = var.aws_region
default_tags {
tags = {
Environment = var.environment
ManagedBy = "Terraform"
}
}
}
provider "azurerm" {
features {}
subscription_id = var.azure_subscription_id
tenant_id = var.azure_tenant_id
}
provider "google" {
project = var.gcp_project_id
region = var.gcp_region
}
Unified Abstraction Layer
# modules/compute/main.tf
variable "cloud_provider" {
description = "Cloud provider: aws, azure, gcp"
type = string
}
variable "instance_type" {
description = "Instance size"
type = string
}
variable "instance_name" {
description = "Name for the instance"
type = string
}
variable "vpc_id" {
description = "VPC ID"
type = string
}
variable "subnet_id" {
description = "Subnet ID"
type = string
}
variable "tags" {
description = "Tags for resources"
type = map(string)
default = {}
}
locals {
# Cloud-specific mappings
ami_ids = {
aws = "ami-0c55b159cbfafe1f0"
azure = "Canonical:UbuntuServer:18.04-LTS:latest"
gcp = "ubuntu-1804-bionic-v20230727"
}
}
resource "aws_instance" "this" {
count = var.cloud_provider == "aws" ? 1 : 0
ami = local.ami_ids.aws
instance_type = var.instance_type
subnet_id = var.subnet_id
tags = merge(var.tags, { Name = var.instance_name })
}
resource "azurerm_linux_virtual_machine" "this" {
count = var.cloud_provider == "azure" ? 1 : 0
name = var.instance_name
resource_group_name = var.vpc_id
location = var.subnet_id
size = var.instance_type
admin_username = "admin"
source_image_reference {
publisher = "Canonical"
offer = "UbuntuServer"
sku = "18.04-LTS"
version = "latest"
}
tags = merge(var.tags, { Name = var.instance_name })
}
resource "google_compute_instance" "this" {
count = var.cloud_provider == "gcp" ? 1 : 0
name = var.instance_name
machine_type = var.instance_type
zone = var.subnet_id
boot_disk {
initialize_params {
image = local.ami_ids.gcp
}
}
network_interface {
subnetwork = var.subnet_id
}
tags = merge(var.tags, { Name = var.instance_name })
}
output "instance_id" {
value = coalesce(
aws_instance.this[0].id,
azurerm_linux_virtual_machine.this[0].id,
google_compute_instance.this[0].id
)
}
output "instance_ip" {
value = coalesce(
aws_instance.this[0].private_ip,
azurerm_linux_virtual_machine.this[0].private_ip_address,
google_compute_instance.this[0].network_interface[0].network_ip
)
}
Pulumi Multi-Cloud
Cross-Cloud Code
#!/usr/bin/env python3
"""Pulumi multi-cloud infrastructure."""
import pulumi
import pulumi_aws as aws
import pulumi_azure as azure
import pulumi_gcp as gcp
config = pulumi.Config()
cloud_provider = config.require("cloud_provider")
environment = config.require("environment")
vpc_cidr = config.require("vpc_cidr")
class NetworkStack:
"""Multi-cloud network stack."""
def __init__(self, provider: str, name: str):
self.provider = provider
self.name = name
self.vpc_id = ""
self.subnet_ids = []
if provider == "aws":
self._aws_network()
elif provider == "azure":
self._azure_network()
elif provider == "gcp":
self._gcp_network()
def _aws_network(self):
vpc = aws.ec2.Vpc(
f"{self.name}-vpc",
cidr_block="10.0.0.0/16",
enable_dns_hostnames=True,
enable_dns_support=True,
tags={"Name": f"{self.name}-vpc"}
)
subnet1 = aws.ec2.Subnet(
f"{self.name}-subnet-1",
vpc_id=vpc.id,
cidr_block="10.0.1.0/24",
availability_zone="us-east-1a",
tags={"Name": f"{self.name}-subnet-1"}
)
subnet2 = aws.ec2.Subnet(
f"{self.name}-subnet-2",
vpc_id=vpc.id,
cidr_block="10.0.2.0/24",
availability_zone="us-east-1b",
tags={"Name": f"{self.name}-subnet-2"}
)
self.vpc_id = vpc.id
self.subnet_ids = [subnet1.id, subnet2.id]
def _azure_network(self):
rg = azure.core.ResourceGroup(
f"{self.name}-rg",
name=f"{self.name}-rg",
location="eastus"
)
vnet = azure.network.VirtualNetwork(
f"{self.name}-vnet=f",
name"{self.name}-vnet",
resource_group_name=rg.name,
address_spaces=["10.0.0.0/16"],
location=rg.location
)
subnet1 = azure.network.Subnet(
f"{self.name}-subnet-1",
name=f"{self.name}-subnet-1",
resource_group_name=rg.name,
virtual_network_name=vnet.name,
address_prefixes=["10.0.1.0/24"]
)
self.vpc_id = rg.name
self.subnet_ids = [subnet1.id]
def _gcp_network(self):
network = gcp.compute.Network(
f"{self.name}-network",
name=f"{self.name}-network",
auto_create_subnetworks=False
)
subnet1 = gcp.compute.Subnetwork(
f"{self.name}-subnet-1",
name=f"{self.name}-subnet-1",
network=network.id,
ip_cidr_range="10.0.1.0/24",
region="us-east1"
)
self.vpc_id = network.id
self.subnet_ids = [subnet1.id]
# Create network based on config
network = NetworkStack(cloud_provider, environment)
pulumi.export("vpc_id", network.vpc_id)
pulumi.export("subnet_ids", network.subnet_ids)
Cloud-Native Service Abstraction
Database Abstraction
# modules/database/main.tf
variable "cloud_provider" {
type = string
default = "aws"
}
variable "database_name" {
type = string
}
variable "instance_class" {
type = string
}
variable "multi_az" {
type = bool
default = false
}
variable "allocated_storage" {
type = number
default = 20
}
variable "engine" {
type = string
default = "postgres"
}
variable "engine_version" {
type = string
default = "15"
}
variable "vpc_id" {
type = string
}
variable "subnet_ids" {
type = list(string)
}
resource "aws_rds_cluster" "this" {
count = var.cloud_provider == "aws" && var.engine == "postgres" ? 1 : 0
cluster_identifier = var.database_name
engine = var.engine
engine_version = var.engine_version
database_name = "maindb"
master_username = "admin"
master_password = random_password.db_password.result
instance_class = var.instance_class
allocated_storage = var.allocated_storage
backup_retention_period = 7
preferred_backup_window = "03:00-04:00"
vpc_security_group_ids = [aws_security_group.rds[0].id]
db_subnet_group_name = aws_db_subnet_group.this[0].name
tags = { Name = var.database_name }
}
resource "aws_db_subnet_group" "this" {
count = var.cloud_provider == "aws" ? 1 : 0
name = var.database_name
subnet_ids = var.subnet_ids
tags = { Name = var.database_name }
}
resource "azurerm_postgresql_flexible_server" "this" {
count = var.cloud_provider == "azure" ? 1 : 0
name = var.database_name
resource_group_name = var.vpc_id
location = "eastus"
sku_name = var.instance_class
storage_mb = var.allocated_storage * 1024
administrator_login = "admin"
administrator_password = random_password.db_password.result
version = var.engine_version
subnet_name = var.subnet_ids[0]
tags = { Name = var.database_name }
}
resource "random_password" "db_password" {
length = 16
special = true
}
output "endpoint" {
value = coalesce(
try(aws_rds_cluster.this[0].endpoint, ""),
try(azurerm_postgresql_flexible_server.this[0].fqdn, "")
)
}
State Management
Remote State with Locking
# backend.tf
terraform {
backend "s3" {
bucket = "terraform-state-prod"
key = "global/s3/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-state-lock"
}
}
# Pulumi state with Azure Blob
import pulumi
pulumi.runtime.set_config("azure:location", "eastus")
# State is automatically managed
# No additional configuration needed for most cases
State Import and Migration
# Import existing AWS resource to Terraform
terraform import aws_instance.existing i-1234567890abcdef0
# Migrate state between backends
terraform init -migrate-state -from-backend=s3 -to-backend=consul
# State list and show
terraform state list
terraform state show aws_instance.example
GitOps with Multi-Cloud
Terragrunt Configuration
# terragrunt.hcl
terraform {
before_hook "validate" {
commands = ["plan", "apply"]
execute = ["tflint"]
}
after_hook "plan" {
commands = ["plan"]
execute = ["echo", "Plan completed"]
}
}
inputs = {
environment = "production"
tags = {
Project = "multi-cloud"
Environment = "production"
}
}
# Environment-specific overrides
generate = {
path = "providers.tf"
if_exists = "overwrite_terragrunt"
contents = <<EOF
provider "aws" {
region = "us-east-1"
default_tags {
tags = {
Environment = "production"
Project = "multi-cloud"
}
}
}
EOF
}
# Run in specific environment
terragrunt run-all plan --terragrunt-working-dir environments/production/
terragrunt run-all apply --terragrunt-working-dir environments/production/
Cost Optimization
Right-Sizing Resources
#!/usr/bin/env python3
"""Cost optimization through right-sizing analysis."""
import boto3
from datetime import datetime, timedelta
ce = boto3.client('ce')
ec2 = boto3.client('ec2')
def get_underutilized_instances():
"""Find underutilized EC2 instances."""
# Get cost data for last 30 days
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
response = ce.get_cost_and_usage(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Granularity='DAILY',
Metrics=['UnblendedCost', 'UsageQuantity'],
GroupBy=[
{'Type': 'DIMENSION', 'Key': 'INSTANCE_TYPE'}
]
)
# Get CloudWatch metrics for instances
instances = ec2.describe_instances(
Filters=[
{'Name': 'instance-state-name', 'Values': ['running']}
]
)
recommendations = []
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
# Check CPU utilization
metrics = ec2.get_metric_statistics(
Namespace='AWS/EC2',
MetricName='CPUUtilization',
StartTime=start_date,
EndTime=end_date,
Period=86400,
Statistics=['Average']
)
avg_cpu = sum(m['Average'] for m in metrics['Datapoints']) / len(metrics['Datapoints']) if metrics['Datapoints'] else 0
if avg_cpu < 20:
recommendations.append({
'instance_id': instance['InstanceId'],
'instance_type': instance['InstanceType'],
'avg_cpu': avg_cpu,
'recommendation': 'Downsize to smaller instance'
})
return recommendations
if __name__ == '__main__':
recs = get_underutilized_instances()
for r in recs:
print(f"{r['instance_id']}: {r['instance_type']} at {r['avg_cpu']:.1f}% CPU - {r['recommendation']}")
Comments