internal/pkg/docker/dockerfile/lex.go (171 lines of code) (raw):
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package dockerfile
import (
"bufio"
"fmt"
"io"
"strings"
"unicode"
)
// instructionName identifies the name of the instruction.
type instructionName int
const (
instrErr instructionName = iota // an error occurred while scanning.
instrHealthCheck // a HEALTHCHECK instruction.
instrExpose // an EXPOSE instruction.
instrEOF // done scanning.
)
const (
markerExposeInstr = "expose " // start of an EXPOSE instruction.
markerHealthCheckInstr = "healthcheck " // start of a HEALTHCHECK instruction.
)
var (
lineContinuationMarkers = []string{"`", "\\"} // denotes that the instruction continues to the next line.
instrMarkers = map[instructionName]string{ // lookup table for how an instruction starts.
instrExpose: markerExposeInstr,
instrHealthCheck: markerHealthCheckInstr,
}
)
// An instruction part of a Dockerfile.
// Dockerfiles are of the following format:
// ```
// # Comment
// INSTRUCTION arguments
// ```
type instruction struct {
name instructionName // the type of the instruction.
args string // the arguments of an instruction.
line int // line number at the start of this instruction.
}
// lexer holds the state of the scanner.
type lexer struct {
scanner *bufio.Scanner // line-by-line scanner of the contents of the Dockerfile.
curLineCount int // line number scanned so far.
curLine string // current line scanned.
curArgs *strings.Builder // accumulated arguments for an instruction.
instructions chan instruction //channel of discovered instructions.
}
// lex returns a running lexer that scans the Dockerfile.
// The lexing logic is heavily inspired by:
// https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/text/template/parse/lex.go
func lex(reader io.Reader) *lexer {
l := &lexer{
scanner: bufio.NewScanner(reader),
curArgs: new(strings.Builder),
instructions: make(chan instruction),
}
go l.run()
return l
}
// next returns the next scanned instruction.
func (lex *lexer) next() instruction {
return <-lex.instructions
}
// readLine loads the next line in the Dockerfile.
// If we reached the end of the file, then isEOF is set to true.
// If any unexpected error occurs during scanning, then err is not nil.
func (lex *lexer) readLine() (isEOF bool, err error) {
if ok := lex.scanner.Scan(); !ok {
if err := lex.scanner.Err(); err != nil {
return false, err
}
return true, nil
}
lex.curLineCount++
lex.curLine = lex.scanner.Text()
return false, nil
}
// emit passes an instruction back to the client.
func (lex *lexer) emit(name instructionName) {
defer lex.curArgs.Reset()
lex.instructions <- instruction{
name: name,
args: lex.curArgs.String(),
line: lex.curLineCount,
}
}
// emitErr notifies clients that an error occurred during scanning.
func (lex *lexer) emitErr(err error) {
lex.instructions <- instruction{
name: instrErr,
args: err.Error(),
line: lex.curLineCount,
}
}
// consumeInstr keeps calling readLine and storing the arguments in the lexer until there is no more
// continuation marker and then emits the instruction.
func (lex *lexer) consumeInstr(name instructionName) stateFn {
isEOF, err := lex.readLine()
if err != nil {
lex.emitErr(err)
return nil
}
if isEOF {
lex.emitErr(fmt.Errorf("unexpected EOF while reading Dockerfile at line %d", lex.curLineCount))
return nil
}
// For example a healthcheck instruction like:
// ```
// HEALTHCHECK --interval=5m --timeout=3s --start-period=2s\
// --retries=3 \
// CMD curl -f http://localhost/ || exit 1`
// ```
// will be stored as:
// curArgs = "--interval=5m --timeout=3s --start-period=2s --retries=3 CMD curl -f http://localhost/ || exit 1"
clean := trimContinuationLineMarker(trimLeadingWhitespaces(lex.curLine))
_, err = lex.curArgs.WriteString(fmt.Sprintf(" %s", clean)) // separate each new line with a space character.
if err != nil {
lex.emitErr(fmt.Errorf("write '%s' to arguments buffer: %w", clean, err))
return nil
}
if hasLineContinuationMarker(lex.curLine) {
return lex.consumeInstr(name)
}
lex.emit(name)
return lexContent
}
// run walks through the state machine for the lexer.
func (lex *lexer) run() {
for state := lexContent; state != nil; {
state = state(lex)
}
close(lex.instructions)
}
// stateFn represents a state machine transition of the scanner going from one INSTRUCTION to the next.
type stateFn func(*lexer) stateFn
// lexContent scans until we reach the end of the Dockerfile.
func lexContent(l *lexer) stateFn {
isEOF, err := l.readLine()
if err != nil {
l.emitErr(err)
return nil
}
if isEOF {
l.emit(instrEOF)
return nil
}
line := strings.ToLower(strings.TrimLeftFunc(l.curLine, unicode.IsSpace))
switch {
case strings.HasPrefix(line, markerExposeInstr):
return lexExpose
case strings.HasPrefix(line, markerHealthCheckInstr):
return lexHealthCheck
default:
return lexContent // Ignore all the other instructions, consume the line without emitting any instructions.
}
}
// lexExpose collects the arguments for an EXPOSE instruction and then emits it.
func lexExpose(l *lexer) stateFn {
return lexInstruction(l, instrExpose)
}
// lexHealthCheck collects the arguments for a HEALTHCHECK instruction and then emits it.
func lexHealthCheck(l *lexer) stateFn {
return lexInstruction(l, instrHealthCheck)
}
// lexInstruction collects all the arguments for the named instruction and then emits it.
func lexInstruction(l *lexer, name instructionName) stateFn {
args := trimContinuationLineMarker(trimInstruction(l.curLine, instrMarkers[name]))
_, err := l.curArgs.WriteString(args)
if err != nil {
l.emitErr(fmt.Errorf("write '%s' to arguments buffer: %w", args, err))
return nil
}
if hasLineContinuationMarker(l.curLine) {
return l.consumeInstr(name)
}
l.emit(name)
return lexContent
}
// hasLineContinuationMarker returns true if the line wraps to the next line.
func hasLineContinuationMarker(line string) bool {
for _, marker := range lineContinuationMarkers {
if strings.HasSuffix(line, marker) {
return true
}
}
return false
}
// trimInstruction trims the instrMarker prefix from line and returns it.
func trimInstruction(line, instrMarker string) string {
normalized := strings.ToLower(line)
if !strings.Contains(normalized, instrMarker) {
return line
}
idx := strings.Index(normalized, instrMarker) + len(instrMarker)
return line[idx:]
}
// trimContinuationLineMarker returns the line without any continuation line markers.
// If the line doesn't have a continuation marker, then returns it as is.
func trimContinuationLineMarker(line string) string {
for _, marker := range lineContinuationMarkers {
if strings.HasSuffix(line, marker) {
return strings.TrimSuffix(line, marker)
}
}
return line
}
// trimLeadingWhitespaces removes any leading space characters.
func trimLeadingWhitespaces(line string) string {
return strings.TrimLeftFunc(line, unicode.IsSpace)
}