hasher-matcher-actioner/terraform/main.tf (239 lines of code) (raw):
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
}
provider "aws" {
region = "us-east-1"
}
data "aws_region" "default" {}
locals {
common_tags = {
"HMAPrefix" = var.prefix
}
te_data_folder = module.hashing_data.threat_exchange_data_folder_info.key
te_api_token_secret_name = "threatexchange/${var.prefix}_api_tokens"
hma_api_tokens_secret_name = "hma/${var.prefix}_api_tokens"
durable_storage_path = "/mnt/durable-storage"
}
### Config storage ###
resource "aws_dynamodb_table" "config_table" {
name = "${var.prefix}-HMAConfig"
billing_mode = "PAY_PER_REQUEST"
hash_key = "ConfigType"
range_key = "ConfigName"
attribute {
name = "ConfigType"
type = "S"
}
attribute {
name = "ConfigName"
type = "S"
}
tags = merge(
var.additional_tags,
{
Name = "HMAConfig"
}
)
}
locals {
config_table = {
arn = aws_dynamodb_table.config_table.arn
name = aws_dynamodb_table.config_table.name
}
}
module "datastore" {
source = "./datastore"
prefix = var.prefix
additional_tags = merge(var.additional_tags, local.common_tags)
}
module "hashing_data" {
source = "./hashing-data"
prefix = var.prefix
additional_tags = merge(var.additional_tags, local.common_tags)
data_bucket = {
bucket_name = aws_s3_bucket.data_bucket.id
bucket_arn = aws_s3_bucket.data_bucket.arn
}
submissions_queue = {
queue_arn = aws_sqs_queue.submissions_queue.arn
queue_url = aws_sqs_queue.submissions_queue.id
}
}
module "indexer" {
source = "./indexer"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
indexer = "hmalib.lambdas.unified_indexer.lambda_handler"
}
}
threat_exchange_data = {
bucket_name = module.hashing_data.threat_exchange_data_folder_info.bucket_name
data_folder = local.te_data_folder
notification_topic = module.hashing_data.threat_exchange_data_folder_info.notification_topic
}
index_data_storage = {
bucket_name = module.hashing_data.index_folder_info.bucket_name
index_folder_key = module.hashing_data.index_folder_info.key
}
banks_datastore = module.datastore.banks_datastore
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
measure_performance = var.measure_performance
indexer_frequency = var.indexer_frequency
config_table = local.config_table
}
module "counters" {
# DynamoDB does not necessarily [1] send the table name to the stream processor.
# So instead of relying on it, we configure multiple lambdas using the same
# underlying code. Since there is a lot to configure per lambda, we replicate
# the module instead of individual resources inside the module.
# 1: https://stackoverflow.com/questions/35278881/how-to-get-the-table-name-in-aws-dynamodb-trigger-function
for_each = {
HMADataStore = module.datastore.primary_datastore.stream_arn
HMABanks = module.datastore.banks_datastore.stream_arn
}
source = "./counters"
prefix = var.prefix
additional_tags = merge(var.additional_tags, local.common_tags)
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
ddb_stream_counter = "hmalib.lambdas.ddb_stream_counter.lambda_handler"
}
}
source_stream_arn = each.value
source_table_type = each.key
counts_datastore = {
name = module.datastore.counts_datastore.name
arn = module.datastore.counts_datastore.arn
}
log_retention_in_days = var.log_retention_in_days
measure_performance = var.measure_performance
}
module "fetcher" {
source = "./fetcher"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
fetcher = "hmalib.lambdas.fetcher.lambda_handler"
}
}
datastore = module.datastore.primary_datastore
threat_exchange_data = {
bucket_name = module.hashing_data.threat_exchange_data_folder_info.bucket_name
data_folder = local.te_data_folder
}
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
fetch_frequency = var.fetch_frequency
te_api_token_secret = aws_secretsmanager_secret.te_api_token
config_table = local.config_table
}
resource "aws_sns_topic" "matches" {
name_prefix = "${var.prefix}-matches"
}
# Set up webapp resources (s3 bucket and cloudfront distribution)
module "webapp" {
source = "./webapp"
prefix = var.prefix
organization = var.organization
include_cloudfront_distribution = var.include_cloudfront_distribution
}
/**
* # Authentication:
* authentication is currently handled in two ways:
* 1) list of permanent access tokens stored in aws secrets
* 2) user accesss via a dedicated or shared Cognito user pool
*
* Both methods are validated in an lambda: module.api.aws_lambda_function.api_auth
* before being sent along to the rests of the system.
*/
resource "aws_secretsmanager_secret" "hma_api_tokens" {
name = local.hma_api_tokens_secret_name
recovery_window_in_days = 0
}
resource "aws_secretsmanager_secret_version" "hma_api_tokens" {
secret_id = aws_secretsmanager_secret.hma_api_tokens.id
secret_string = jsonencode(var.integration_api_access_tokens)
}
# Set up Cognito for authenticating webapp and api (unless shared setup is indicated in terraform.tfvars)
module "authentication" {
source = "./authentication"
prefix = var.prefix
organization = var.organization
use_cloudfront_distribution_url = var.include_cloudfront_distribution && !var.use_shared_user_pool
cloudfront_distribution_url = "https://${module.webapp.cloudfront_distribution_domain_name}"
use_shared_user_pool = var.use_shared_user_pool
webapp_and_api_shared_user_pool_id = var.webapp_and_api_shared_user_pool_id
webapp_and_api_shared_user_pool_client_id = var.webapp_and_api_shared_user_pool_client_id
}
module "durable_fs" {
source = "./durable-fs"
prefix = var.prefix
additional_tags = var.additional_tags
}
/**
* # Primary S3 Bucket:
* Jack-of-all-trades S3 bucket. Used for storing raw data from threatexchange,
* checkpoints, and upload-type media submissions.
*
* Inside another module (hashing-data), we create a couple of notification
* configs on this bucket.
*/
resource "aws_s3_bucket" "data_bucket" {
bucket_prefix = "${var.prefix}-hashing-data"
acl = "private"
tags = merge(
var.additional_tags,
{
Name = "HashingDataBucket"
}
)
cors_rule {
allowed_headers = ["*"]
allowed_methods = ["PUT"]
allowed_origins = ["*"]
max_age_seconds = 3000
}
versioning {
enabled = true
}
# For development, this makes cleanup easier
# If deploying for real, this should not be used
# Could also be set with a variable
force_destroy = true
}
/**
* # Banks S3 Media Bucket:
* Stores media uploaded by bank admins. Store raw data. Should ideally be
* closed to the wider internet and can have retention rules based on legal
* policy.
*/
resource "aws_s3_bucket" "banks_media_bucket" {
bucket_prefix = "${var.prefix}-banks-media-"
acl = "private"
tags = merge(
var.additional_tags,
{
Name = "BanksMediaBucket"
}
)
versioning {
# Don't see a reason for versioning media. Everything will be keyed on uuid
# and current date.
enabled = false
}
cors_rule {
# The webapp uploads media to this bucket directly.
allowed_headers = ["*"]
allowed_methods = ["PUT"]
# Partners could lock this down to specific origin where possible.
allowed_origins = ["*"]
max_age_seconds = 3000
}
lifecycle {
# To prevent execution of plans which would cause this bucket to get
# destroyed. Once in the hands of partners, we have to be extra careful to
# not accidentally delete their data.
prevent_destroy = true
}
}
/*
* # Submissions SQS:
* Submissions from the API are routed directly into a queue. Doing an SNS
* indirection **could** allow multiple lambdas to be listening for submissions,
* but, that would be costly because the lambda invocation would cost money.
*
* Instead, we will have a single hashing lambda capable of handling all
* content_types. If the content, because of its size or because of compute
* complexity can't be handled by this "base" lambda, it will be routed to
* another specially capable lambda queue.
*
* - This should soon absorb the pdq_images queue + SNS topic as the only queue
* that we will publish submissions on.
* If we have proven that the generic lambda can generate PDQ signals, we can
* do away with the PDQ specific infrastructure altogether.
*/
resource "aws_sqs_queue" "submissions_queue_dlq" {
name_prefix = "${var.prefix}-submissions-deadletter-"
visibility_timeout_seconds = 300
message_retention_seconds = var.deadletterqueue_message_retention_seconds
tags = merge(
var.additional_tags,
local.common_tags,
{
Name = "SubmissionDLQ"
}
)
}
resource "aws_sqs_queue" "submissions_queue" {
name_prefix = "${var.prefix}-submissions"
visibility_timeout_seconds = 300
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.submissions_queue_dlq.arn
maxReceiveCount = 4
})
tags = merge(
var.additional_tags,
local.common_tags,
{
Name = "SubmissionsQueue"
}
)
}
resource "aws_sqs_queue" "hashes_queue_dlq" {
name_prefix = "${var.prefix}-hashes-deadletter-"
visibility_timeout_seconds = 300
message_retention_seconds = var.deadletterqueue_message_retention_seconds
tags = merge(
var.additional_tags,
local.common_tags,
{
Name = "HashesDLQ"
}
)
}
resource "aws_sqs_queue" "hashes_queue" {
name_prefix = "${var.prefix}-hashes"
visibility_timeout_seconds = 300
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.hashes_queue_dlq.arn
maxReceiveCount = 4
})
tags = merge(
var.additional_tags,
local.common_tags,
{
Name = "HashesQueue"
}
)
}
module "hasher" {
source = "./hasher"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
}
datastore = module.datastore.primary_datastore
banks_datastore = module.datastore.banks_datastore
submissions_queue = {
arn = aws_sqs_queue.submissions_queue.arn
}
hashes_queue = {
arn = aws_sqs_queue.hashes_queue.arn
url = aws_sqs_queue.hashes_queue.id
}
image_data_storage = {
bucket_name = module.hashing_data.image_folder_info.bucket_name
image_prefix = module.hashing_data.image_folder_info.key
all_bucket_arns = concat(
[
"arn:aws:s3:::${module.hashing_data.image_folder_info.bucket_name}/${module.hashing_data.image_folder_info.key}*"
],
[for partner_bucket in var.partner_image_buckets : "${partner_bucket.arn}/*"]
)
}
durable_fs_subnet_ids = module.durable_fs.durable_fs_subnet_ids
durable_fs_security_group_ids = module.durable_fs.durable_fs_security_group_ids
durable_fs_arn = module.durable_fs.durable_fs_arn
durable_fs_local_mount_path = local.durable_storage_path
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
config_table = local.config_table
measure_performance = var.measure_performance
}
module "matcher" {
source = "./matcher"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
}
datastore = module.datastore.primary_datastore
hashes_queue = {
arn = aws_sqs_queue.hashes_queue.arn
url = aws_sqs_queue.hashes_queue.id
}
matches_topic_arn = aws_sns_topic.matches.arn
banks_datastore = module.datastore.banks_datastore
index_data_storage = {
bucket_name = module.hashing_data.index_folder_info.bucket_name
index_folder_key = module.hashing_data.index_folder_info.key
}
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
config_table = local.config_table
measure_performance = var.measure_performance
}
# Set up api
module "api" {
source = "./api"
prefix = var.prefix
api_and_webapp_user_pool_id = module.authentication.webapp_and_api_user_pool_id
api_authorizer_audience = module.authentication.webapp_and_api_user_pool_client_id
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
api_root = "hmalib.lambdas.api.api_root.lambda_handler"
api_auth = "hmalib.lambdas.api.api_auth.lambda_handler"
}
}
datastore = module.datastore.primary_datastore
banks_datastore = module.datastore.banks_datastore
counts_datastore = module.datastore.counts_datastore
image_data_storage = {
bucket_name = module.hashing_data.image_folder_info.bucket_name
image_prefix = module.hashing_data.image_folder_info.key
}
index_data_storage = {
bucket_name = module.hashing_data.index_folder_info.bucket_name
index_folder_key = module.hashing_data.index_folder_info.key
}
threat_exchange_data = {
bucket_name = module.hashing_data.threat_exchange_data_folder_info.bucket_name
data_folder = local.te_data_folder
}
banks_media_storage = {
bucket_name = aws_s3_bucket.banks_media_bucket.id
bucket_arn = aws_s3_bucket.banks_media_bucket.arn
}
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
config_table = local.config_table
measure_performance = var.measure_performance
te_api_token_secret = aws_secretsmanager_secret.te_api_token
hma_api_access_tokens_secret = aws_secretsmanager_secret.hma_api_tokens
writebacks_queue = module.actions.writebacks_queue
indexer_function_name = module.indexer.indexer_function_name
indexer_function_arn = module.indexer.indexer_function_arn
hashes_queue = {
url = aws_sqs_queue.hashes_queue.id,
arn = aws_sqs_queue.hashes_queue.arn
}
submissions_queue = {
url = aws_sqs_queue.submissions_queue.id,
arn = aws_sqs_queue.submissions_queue.arn
}
partner_image_buckets = var.partner_image_buckets
enable_partner_upload_notification = var.enable_partner_upload_notification
api_in_vpc = var.api_in_vpc
vpc_id = var.vpc_id
vpc_subnets = var.vpc_subnets
security_groups = var.security_groups
}
# Build and deploy webapp
locals {
dashboard_name = "${var.prefix}-dashboard"
aws_dashboard_url = var.measure_performance ? "https://console.aws.amazon.com/cloudwatch/home?region=${data.aws_region.default.name}#dashboards:name=${local.dashboard_name}" : ""
}
resource "local_file" "webapp_env" {
depends_on = [
module.api.invoke_url,
module.authentication.webapp_and_api_user_pool_id,
module.authentication.webapp_and_api_user_pool_client_id
]
sensitive_content = "REACT_APP_AWS_DASHBOARD_URL=${local.aws_dashboard_url}\nREACT_APP_REGION=${data.aws_region.default.name}\nREACT_APP_USER_POOL_ID=${module.authentication.webapp_and_api_user_pool_id}\nREACT_APP_USER_POOL_APP_CLIENT_ID=${module.authentication.webapp_and_api_user_pool_client_id}\nREACT_APP_HMA_API_ENDPOINT=${module.api.invoke_url}\n"
filename = "../webapp/.env"
}
resource "null_resource" "build_and_deploy_webapp" {
depends_on = [
module.webapp.s3_bucket_name,
local_file.webapp_env
]
provisioner "local-exec" {
command = "npm install --silent"
working_dir = "../webapp"
}
provisioner "local-exec" {
command = "npm run build"
working_dir = "../webapp"
}
provisioner "local-exec" {
command = "aws s3 sync ../webapp/build s3://${module.webapp.s3_bucket_name} --acl ${var.include_cloudfront_distribution ? "private" : "public-read"}"
}
}
module "actions" {
source = "./actions"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
action_evaluator = "hmalib.lambdas.actions.action_evaluator.lambda_handler"
action_performer = "hmalib.lambdas.actions.action_performer.lambda_handler"
writebacker = "hmalib.lambdas.actions.writebacker.lambda_handler"
}
}
matches_sns_topic_arn = aws_sns_topic.matches.arn
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
measure_performance = var.measure_performance
te_api_token_secret = aws_secretsmanager_secret.te_api_token
config_table = {
name = aws_dynamodb_table.config_table.name
arn = aws_dynamodb_table.config_table.arn
}
datastore = module.datastore.primary_datastore
queue_batch_size = var.set_sqs_windows_to_min ? 10 : 100
queue_window_in_seconds = var.set_sqs_windows_to_min ? 0 : 30
deadletterqueue_message_retention_seconds = var.deadletterqueue_message_retention_seconds
}
### ThreatExchange API Token Secret ###
resource "aws_secretsmanager_secret" "te_api_token" {
name = local.te_api_token_secret_name
recovery_window_in_days = 0
}
resource "aws_secretsmanager_secret_version" "te_api_token" {
secret_id = aws_secretsmanager_secret.te_api_token.id
secret_string = var.te_api_token
}
### New Submission Path (Use SNS topic instead of HTTP API) ###
module "submit_events" {
count = var.create_submit_event_sns_topic_and_handler ? 1 : 0
source = "./submit_events"
prefix = var.prefix
lambda_docker_info = {
uri = var.hma_lambda_docker_uri
commands = {
submit_event_handler = "hmalib.lambdas.submit_event_handler.lambda_handler"
}
}
datastore = module.datastore.primary_datastore
log_retention_in_days = var.log_retention_in_days
additional_tags = merge(var.additional_tags, local.common_tags)
submissions_queue = {
url = aws_sqs_queue.submissions_queue.id,
arn = aws_sqs_queue.submissions_queue.arn
}
partner_image_buckets = var.partner_image_buckets
deadletterqueue_message_retention_seconds = var.deadletterqueue_message_retention_seconds
}
### Basic Dashboard ###
locals {
# Since module submit_event may or may not be initialized
is_submit_event_module_initialized = length(module.submit_events) != 0
}
module "dashboard" {
count = var.measure_performance ? 1 : 0
depends_on = [
module.api.api_root_function_name,
module.datastore.primary_datastore,
]
name = local.dashboard_name
source = "./dashboard"
prefix = var.prefix
datastore = module.datastore.primary_datastore
pipeline_lambdas = [
(["Hash", module.hasher.hasher_function_name]),
(["Match", module.matcher.matcher_function_name]),
(["Action Evaluator", module.actions.action_evaluator_function_name]),
(["Action Performer", module.actions.action_performer_function_name])
] # Not currently included fetcher, indexer, writebacker, and counter functions
api_lambda_name = module.api.api_root_function_name
auth_lambda_name = module.api.api_auth_function_name
other_lambdas = concat([
module.fetcher.fetcher_function_name,
module.indexer.indexer_function_name,
module.actions.writebacker_function_name,
module.api.api_auth_function_name,
module.api.api_root_function_name,
], # all lambdas not given as pipeline_lambdas
# optionally add submit module lambda if initialized.
local.is_submit_event_module_initialized ? [module.submit_events[0].submit_event_handler_function_name] : []
)
queues_to_monitor = [
(["ImageQueue", aws_sqs_queue.submissions_queue.name, aws_sqs_queue.submissions_queue_dlq.name]),
(["HashQueue", aws_sqs_queue.hashes_queue.name, aws_sqs_queue.hashes_queue_dlq.name]),
(["MatchQueue", module.actions.matches_queue_name, module.actions.matches_dlq_name]),
(["ActionQueue", module.actions.actions_queue_name, module.actions.actions_dlq_name])
] # Could also monitor sns topics
submit_event_lambda_name = local.is_submit_event_module_initialized ? module.submit_events[0].submit_event_handler_function_name : null
submit_event_queue = local.is_submit_event_module_initialized ? (["SubmitEventQueue", module.submit_events[0].submit_event_queue_name, module.submit_events[0].submit_event_dlq_name]) : null
api_gateway_id = module.api.api_gateway_id
}