Automate ECS Fargate Cluster Creation Using Terraform
ECS Fargate is a compute engine for AWS ECS. You don’t need to manage the EC2 instance explicitly if Fargate is chosen as launch type. It helps you to scale your application without managing the instances manually.
ECS Terminologies
- Task Definitions – A text file in JSON format which describes one or more containers that forms the application. Its a blueprint of application.
- Task – Instance of Task Definition.
- Cluster – Logical grouping of resources that application needs
1. Fargate LaunchType – If Fargate is chosen then ECS manages the cluster resources.
2. ECS LaunchType – If EC2 is chosen then cluster will be group of container instances that we manage.
Clone the terraform-ecs-fargate github repository.
git clone https://github.com/AjeetK/terraform-ecs-fargate.git
cd terraform-ecs-fargate
This will have all the terraform files needed to create the ECS Fargate stack along with other resources like vpc, security groups, load-balancer etc.
Once you have the repository. Initialize the terraform to get required modules and then run terraform plan to see what all resources terraform will create.
terraform init
terraform plan
Once you are okay with the output of “terraform plan” command, you can go ahead creating the stack using “terraform apply” command. It will ask you to give confirmation to go ahead and create the stack. Type “yes” and hit enter to keep it going.
terraform apply
This will create the whole cluster and will give you an endpoint of a loadbalancer on which your application will be running.
Access this endpoint in browser and you should see the nginx home page which means our deployment is successful.
Below are the required terraform files in github repository:
# provider.tf
# Specify the provider and access details
provider "aws" {
shared_credentials_file = "$HOME/.aws/credentials"
profile = "default"
region = var.aws_region
}
# variables.tf
variable "aws_region" {
description = "The AWS region things are created in"
default = "us-west-2"
}
variable "ecs_task_execution_role_name" {
description = "ECS task execution role name"
default = "myEcsTaskExecutionRole"
}
variable "az_count" {
description = "Number of AZs to cover in a given region"
default = "2"
}
variable "app_image" {
description = "Docker image to run in the ECS cluster"
default = "nginx:latest"
}
variable "app_port" {
description = "Port exposed by the docker image to redirect traffic to"
default = 80
}
variable "app_count" {
description = "Number of docker containers to run"
default = 3
}
variable "health_check_path" {
default = "/"
}
variable "fargate_cpu" {
description = "Fargate instance CPU units to provision (1 vCPU = 1024 CPU units)"
default = "1024"
}
variable "fargate_memory" {
description = "Fargate instance memory to provision (in MiB)"
default = "2048"
}
# network.tf
# Fetch AZs in the current region
data "aws_availability_zones" "available" {
}
resource "aws_vpc" "main" {
cidr_block = "172.17.0.0/16"
}
# Create var.az_count private subnets, each in a different AZ
resource "aws_subnet" "private" {
count = var.az_count
cidr_block = cidrsubnet(aws_vpc.main.cidr_block, 8, count.index)
availability_zone = data.aws_availability_zones.available.names[count.index]
vpc_id = aws_vpc.main.id
}
# Create var.az_count public subnets, each in a different AZ
resource "aws_subnet" "public" {
count = var.az_count
cidr_block = cidrsubnet(aws_vpc.main.cidr_block, 8, var.az_count + count.index)
availability_zone = data.aws_availability_zones.available.names[count.index]
vpc_id = aws_vpc.main.id
map_public_ip_on_launch = true
}
# Internet Gateway for the public subnet
resource "aws_internet_gateway" "gw" {
vpc_id = aws_vpc.main.id
}
# Route the public subnet traffic through the IGW
resource "aws_route" "internet_access" {
route_table_id = aws_vpc.main.main_route_table_id
destination_cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.gw.id
}
# Create a NAT gateway with an Elastic IP for each private subnet to get internet connectivity
resource "aws_eip" "gw" {
count = var.az_count
vpc = true
depends_on = [aws_internet_gateway.gw]
}
resource "aws_nat_gateway" "gw" {
count = var.az_count
subnet_id = element(aws_subnet.public.*.id, count.index)
allocation_id = element(aws_eip.gw.*.id, count.index)
}
# Create a new route table for the private subnets, make it route non-local traffic through the NAT gateway to the internet
resource "aws_route_table" "private" {
count = var.az_count
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = element(aws_nat_gateway.gw.*.id, count.index)
}
}
# Explicitly associate the newly created route tables to the private subnets (so they don't default to the main route table)
resource "aws_route_table_association" "private" {
count = var.az_count
subnet_id = element(aws_subnet.private.*.id, count.index)
route_table_id = element(aws_route_table.private.*.id, count.index)
}
# security.tf
# ALB Security Group: Edit to restrict access to the application
resource "aws_security_group" "lb" {
name = "myapp-load-balancer-security-group"
description = "controls access to the ALB"
vpc_id = aws_vpc.main.id
ingress {
protocol = "tcp"
from_port = var.app_port
to_port = var.app_port
cidr_blocks = ["0.0.0.0/0"]
}
egress {
protocol = "-1"
from_port = 0
to_port = 0
cidr_blocks = ["0.0.0.0/0"]
}
}
# Traffic to the ECS cluster should only come from the ALB
resource "aws_security_group" "ecs_tasks" {
name = "myapp-ecs-tasks-security-group"
description = "allow inbound access from the ALB only"
vpc_id = aws_vpc.main.id
ingress {
protocol = "tcp"
from_port = var.app_port
to_port = var.app_port
security_groups = [aws_security_group.lb.id]
}
egress {
protocol = "-1"
from_port = 0
to_port = 0
cidr_blocks = ["0.0.0.0/0"]
}
}
# alb.tf
resource "aws_alb" "main" {
name = "myapp-load-balancer"
subnets = aws_subnet.public.*.id
security_groups = [aws_security_group.lb.id]
}
resource "aws_alb_target_group" "app" {
name = "myapp-target-group"
port = 80
protocol = "HTTP"
vpc_id = aws_vpc.main.id
target_type = "ip"
health_check {
healthy_threshold = "3"
interval = "30"
protocol = "HTTP"
matcher = "200"
timeout = "3"
path = var.health_check_path
unhealthy_threshold = "2"
}
}
# Redirect all traffic from the ALB to the target group
resource "aws_alb_listener" "front_end" {
load_balancer_arn = aws_alb.main.id
port = var.app_port
protocol = "HTTP"
default_action {
target_group_arn = aws_alb_target_group.app.id
type = "forward"
}
}
# ecs.tf
resource "aws_ecs_cluster" "main" {
name = "myapp-cluster"
}
data "template_file" "myapp" {
template = file("./templates/ecs/myapp.json.tpl")
vars = {
app_image = var.app_image
app_port = var.app_port
fargate_cpu = var.fargate_cpu
fargate_memory = var.fargate_memory
aws_region = var.aws_region
}
}
resource "aws_ecs_task_definition" "app" {
family = "myapp-task"
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = var.fargate_cpu
memory = var.fargate_memory
container_definitions = data.template_file.myapp.rendered
}
resource "aws_ecs_service" "main" {
name = "myapp-service"
cluster = aws_ecs_cluster.main.id
task_definition = aws_ecs_task_definition.app.arn
desired_count = var.app_count
launch_type = "FARGATE"
network_configuration {
security_groups = [aws_security_group.ecs_tasks.id]
subnets = aws_subnet.private.*.id
assign_public_ip = true
}
load_balancer {
target_group_arn = aws_alb_target_group.app.id
container_name = "myapp"
container_port = var.app_port
}
depends_on = [aws_alb_listener.front_end, aws_iam_role_policy_attachment.ecs_task_execution_role]
}
# auto_scaling.tf
resource "aws_appautoscaling_target" "target" {
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.main.name}"
scalable_dimension = "ecs:service:DesiredCount"
min_capacity = 3
max_capacity = 6
}
# Automatically scale capacity up by one
resource "aws_appautoscaling_policy" "up" {
name = "myapp_scale_up"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.main.name}"
scalable_dimension = "ecs:service:DesiredCount"
step_scaling_policy_configuration {
adjustment_type = "ChangeInCapacity"
cooldown = 60
metric_aggregation_type = "Maximum"
step_adjustment {
metric_interval_lower_bound = 0
scaling_adjustment = 1
}
}
depends_on = [aws_appautoscaling_target.target]
}
# Automatically scale capacity down by one
resource "aws_appautoscaling_policy" "down" {
name = "myapp_scale_down"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.main.name}/${aws_ecs_service.main.name}"
scalable_dimension = "ecs:service:DesiredCount"
step_scaling_policy_configuration {
adjustment_type = "ChangeInCapacity"
cooldown = 60
metric_aggregation_type = "Maximum"
step_adjustment {
metric_interval_upper_bound = 0
scaling_adjustment = -1
}
}
depends_on = [aws_appautoscaling_target.target]
}
# CloudWatch alarm that triggers the autoscaling up policy
resource "aws_cloudwatch_metric_alarm" "service_cpu_high" {
alarm_name = "myapp_cpu_utilization_high"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "60"
statistic = "Average"
threshold = "85"
dimensions = {
ClusterName = aws_ecs_cluster.main.name
ServiceName = aws_ecs_service.main.name
}
alarm_actions = [aws_appautoscaling_policy.up.arn]
}
# CloudWatch alarm that triggers the autoscaling down policy
resource "aws_cloudwatch_metric_alarm" "service_cpu_low" {
alarm_name = "myapp_cpu_utilization_low"
comparison_operator = "LessThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "60"
statistic = "Average"
threshold = "10"
dimensions = {
ClusterName = aws_ecs_cluster.main.name
ServiceName = aws_ecs_service.main.name
}
alarm_actions = [aws_appautoscaling_policy.down.arn]
}
# logs.tf
# Set up CloudWatch group and log stream and retain logs for 30 days
resource "aws_cloudwatch_log_group" "myapp_log_group" {
name = "/ecs/myapp"
retention_in_days = 30
tags = {
Name = "cb-log-group"
}
}
resource "aws_cloudwatch_log_stream" "myapp_log_stream" {
name = "my-log-stream"
log_group_name = aws_cloudwatch_log_group.myapp_log_group.name
}
# outputs.tf
output "alb_hostname" {
value = aws_alb.main.dns_name
}