public constructor()

in src/backend/orchestration/index.ts [154:356]


  public constructor(scope: Construct, id: string, props: OrchestrationProps) {
    super(scope, id);

    this.deadLetterQueue = new Queue(this, 'DLQ', {
      encryption: QueueEncryption.KMS_MANAGED,
      retentionPeriod: Duration.days(14),
      visibilityTimeout: Duration.minutes(15),
    });

    props.monitoring.addHighSeverityAlarm(
      'Backend Orchestration Dead-Letter Queue is not empty',
      new MathExpression({
        expression: 'm1 + m2',
        label: 'Dead-Letter Queue not empty',
        usingMetrics: {
          m1: this.deadLetterQueue.metricApproximateNumberOfMessagesVisible({ period: Duration.minutes(1) }),
          m2: this.deadLetterQueue.metricApproximateNumberOfMessagesNotVisible({ period: Duration.minutes(1) }),
        },
      }).createAlarm(this, 'DLQAlarm', {
        alarmName: `${this.deadLetterQueue.node.path}/NotEmpty`,
        alarmDescription: [
          'Backend orchestration dead-letter queue is not empty.',
          '',
          `RunBook: ${RUNBOOK_URL}`,
          '',
          `Direct link to queue: ${sqsQueueUrl(this.deadLetterQueue)}`,
          'Warning: State Machines executions that sent messages to the DLQ will not show as "failed".',
        ].join('\n'),
        comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
        evaluationPeriods: 1,
        threshold: 1,
      }),
    );

    const sendToDeadLetterQueue = new tasks.SqsSendMessage(this, 'Send to Dead Letter Queue', {
      messageBody: TaskInput.fromJsonPathAt('$'),
      queue: this.deadLetterQueue,
      resultPath: JsonPath.DISCARD,
    }).next(new Succeed(this, 'Sent to DLQ'));

    const ignore = new Pass(this, 'Ignore');

    this.catalogBuilder = new CatalogBuilder(this, 'CatalogBuilder', props);

    const addToCatalog = new tasks.LambdaInvoke(this, 'Add to catalog.json', {
      lambdaFunction: this.catalogBuilder.function,
      resultPath: '$.catalogBuilderOutput',
      resultSelector: {
        'ETag.$': '$.Payload.ETag',
        'VersionId.$': '$.Payload.VersionId',
      },
    })
      // This has a concurrency of 1, so we want to aggressively retry being throttled here.
      .addRetry({ errors: ['Lambda.TooManyRequestsException'], ...THROTTLE_RETRY_POLICY })
      .addCatch(sendToDeadLetterQueue, { errors: ['Lambda.TooManyRequestsException'], resultPath: '$.error' })
      .addCatch(sendToDeadLetterQueue, { errors: ['States.TaskFailed'], resultPath: '$.error' })
      .addCatch(sendToDeadLetterQueue, { errors: ['States.ALL'], resultPath: '$.error' });

    const needsCatalogUpdateFunction = new NeedsCatalogUpdate(this, 'NeedsCatalogUpdate', {
      architecture: gravitonLambdaIfAvailable(this),
      description: '[ConstructHub/Orchestration/NeedsCatalogUpdate] Determines whether a package version requires a catalog update',
      environment: { CATALOG_BUCKET_NAME: props.bucket.bucketName, CATALOG_OBJECT_KEY: CATALOG_KEY },
      memorySize: 1_024,
      timeout: Duration.minutes(1),
    });
    props.bucket.grantRead(needsCatalogUpdateFunction);

    // Check whether the catalog needs updating. If so, trigger addToCatalog.
    const addToCatalogIfNeeded = new tasks.LambdaInvoke(this, 'Check whether catalog needs udpating', {
      lambdaFunction: needsCatalogUpdateFunction,
      payloadResponseOnly: true,
      resultPath: '$.catalogNeedsUpdating',
    })
      .addRetry({
        errors: [
          'Lambda.TooManyRequestsException',
          'Lambda.Unknown', // happens when a lambda times out.
        ],
        ...THROTTLE_RETRY_POLICY,
      })
      .addCatch(sendToDeadLetterQueue, { errors: ['Lambda.TooManyRequestsException', 'Lambda.Unknown'], resultPath: '$.error' } )
      .addCatch(sendToDeadLetterQueue, { errors: ['States.TaskFailed'], resultPath: '$.error' } )
      .addCatch(sendToDeadLetterQueue, { errors: ['States.ALL'], resultPath: '$.error' })
      .next(new Choice(this, 'Is catalog update needed?')
        .when(Condition.booleanEquals('$.catalogNeedsUpdating', true), addToCatalog)
        .otherwise(new Succeed(this, 'Done')),
      );

    this.ecsCluster = new Cluster(this, 'Cluster', {
      containerInsights: true,
      enableFargateCapacityProviders: true,
      vpc: props.vpc,
    });

    this.transliterator = new Transliterator(this, 'Transliterator', props);


    const definition = new Pass(this, 'Track Execution Infos', {
      inputPath: '$$.Execution',
      parameters: {
        'Id.$': '$.Id',
        'Name.$': '$.Name',
        'RoleArn.$': '$.RoleArn',
        'StartTime.$': '$.StartTime',
      },
      resultPath: '$.$TaskExecution',
    })
      .next(
        new Pass(this, 'Prepare doc-gen ECS Command', {
          parameters: { 'command.$': 'States.Array(States.JsonToString($))' },
          resultPath: '$.docGen',
        }),
      )
      .next(
        this.transliterator.createEcsRunTask(this, 'Generate docs', {
          cluster: this.ecsCluster,
          inputPath: '$.docGen.command',
          resultPath: '$.docGenOutput',
          // aws-cdk-lib succeeds in roughly 1 hour, so this should give us
          // enough of a buffer and prorably account for all other libraries out there.
          timeout: Duration.hours(2),
          vpcSubnets: props.vpcSubnets,
          securityGroups: props.vpcSecurityGroups,
        })
          // Do not retry NoSpaceLeftOnDevice errors, these are typically not transient.
          .addRetry({ errors: ['jsii-docgen.NoSpaceLeftOnDevice'], maxAttempts: 0 })
          .addRetry({
            errors: [
              'ECS.AmazonECSException', // Task failed starting, usually due to throttle / out of capacity
              'ECS.InvalidParameterException', // This is returned when ECS gets throttled when trying to access VPC/SGs.
              'jsii-docgen.NpmError.E429', // HTTP 429 ("Too Many Requests") from CodeArtifact's S3 bucket
              'jsii-codgen.NpmError.EPROTO', // Sporadic TLS negotiation failures we see in logs, transient
            ],
            ...DOCGEN_THROTTLE_RETRY_POLICY,
          })
          .addRetry({
            errors: ['jsii-docgen.NpmError.ETARGET'], // Seen when dependencies aren't available yet
            // We'll wait longer between retries. This is to account for CodeArtifact's lag behind npm
            backoffRate: 2,
            interval: Duration.minutes(5),
            maxAttempts: 3,
          })
          .addRetry({ maxAttempts: 3 })
          .addCatch(ignore, { errors: [UNPROCESSABLE_PACKAGE_ERROR_NAME] })
          .addCatch(sendToDeadLetterQueue, { errors: ['States.Timeout'], resultPath: '$.error' } )
          .addCatch(sendToDeadLetterQueue, { errors: ['ECS.AmazonECSException', 'ECS.InvalidParameterException'], resultPath: '$.error' })
          .addCatch(sendToDeadLetterQueue, { errors: ['States.TaskFailed'], resultPath: '$.error' })
          .addCatch(sendToDeadLetterQueue, { errors: ['States.ALL'], resultPath: '$.error' })
          .next(addToCatalogIfNeeded),
      );

    this.stateMachine = new StateMachine(this, 'Resource', {
      definition,
      stateMachineName: stateMachineNameFrom(this.node.path),
      timeout: Duration.days(1), // Ample time for retries, etc...
      tracingEnabled: true,
    });

    if (props.vpc) {
      // Ensure the State Machine does not get to run before the VPC can be used.
      this.stateMachine.node.addDependency(props.vpc.internetConnectivityEstablished);
    }

    props.monitoring.addHighSeverityAlarm(
      'Backend Orchestration Failed',
      this.stateMachine.metricFailed()
        .createAlarm(this, 'OrchestrationFailed', {
          alarmName: `${this.stateMachine.node.path}/${this.stateMachine.metricFailed().metricName}`,
          alarmDescription: [
            'Backend orchestration failed!',
            '',
            `RunBook: ${RUNBOOK_URL}`,
            '',
            `Direct link to state machine: ${stateMachineUrl(this.stateMachine)}`,
            'Warning: messages that resulted in a failed exectuion will NOT be in the DLQ!',
          ].join('\n'),
          comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
          evaluationPeriods: 1,
          threshold: 1,
        }));

    // This function is intended to be manually triggered by an operrator to
    // attempt redriving messages from the DLQ.
    this.redriveFunction = new RedriveStateMachine(this, 'Redrive', {
      description: '[ConstructHub/Redrive] Manually redrives all messages from the backend dead letter queue',
      environment: {
        STATE_MACHINE_ARN: this.stateMachine.stateMachineArn,
        QUEUE_URL: this.deadLetterQueue.queueUrl,
      },
      memorySize: 1_024,
      timeout: Duration.minutes(15),
      tracing: Tracing.ACTIVE,
    });
    this.stateMachine.grantStartExecution(this.redriveFunction);
    this.deadLetterQueue.grantConsumeMessages(this.redriveFunction);

    // The workflow is intended to be manually triggered by an operator to
    // reprocess all package versions currently in store through the orchestrator.
    this.regenerateAllDocumentation = new RegenerateAllDocumentation(this, 'RegenerateAllDocumentation', {
      bucket: props.bucket,
      stateMachine: this.stateMachine,
    }).stateMachine;
  }